Skip to main content

syster/parser/
lexer.rs

1//! Logos-based lexer for SysML v2
2//!
3//! Fast tokenization using the logos crate.
4
5use super::syntax_kind::SyntaxKind;
6use logos::Logos;
7use rowan::TextSize;
8
9/// A token with its kind, text, and position
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct Token<'a> {
12    pub kind: SyntaxKind,
13    pub text: &'a str,
14    pub offset: TextSize,
15}
16
17/// Lexer wrapping the logos-generated tokenizer
18pub struct Lexer<'a> {
19    inner: logos::Lexer<'a, LogosToken>,
20    offset: u32,
21}
22
23impl<'a> Lexer<'a> {
24    pub fn new(input: &'a str) -> Self {
25        Self {
26            inner: LogosToken::lexer(input),
27            offset: 0,
28        }
29    }
30}
31
32impl<'a> Iterator for Lexer<'a> {
33    type Item = Token<'a>;
34
35    fn next(&mut self) -> Option<Self::Item> {
36        let logos_token = self.inner.next()?;
37        let text = self.inner.slice();
38        let offset = TextSize::new(self.offset);
39        self.offset += text.len() as u32;
40
41        let kind = match logos_token {
42            Ok(t) => t.into(),
43            Err(()) => SyntaxKind::ERROR,
44        };
45
46        Some(Token { kind, text, offset })
47    }
48}
49
50/// Tokenize an entire string into a Vec
51#[allow(dead_code)]
52pub fn tokenize(input: &str) -> Vec<Token<'_>> {
53    Lexer::new(input).collect()
54}
55
56/// Logos token enum - maps to SyntaxKind
57#[derive(Logos, Debug, Clone, Copy, PartialEq)]
58#[logos(skip r"")] // Don't skip anything, we want all tokens
59pub enum LogosToken {
60    // =========================================================================
61    // TRIVIA
62    // =========================================================================
63    #[regex(r"[ \t\r\n]+")]
64    Whitespace,
65
66    #[regex(r"//[^\n]*")]
67    LineComment,
68
69    #[regex(r"/\*([^*]|\*[^/])*\*/")]
70    BlockComment,
71
72    // =========================================================================
73    // LITERALS
74    // =========================================================================
75    // Unicode-aware identifier: starts with letter or underscore, followed by letters, numbers, or underscores
76    // \p{L} matches any Unicode letter (Latin, Greek, Cyrillic, etc.)
77    // \p{N} matches any Unicode numeric character
78    #[regex(r"[\p{L}_][\p{L}\p{N}_]*")]
79    Ident,
80
81    // Unrestricted name: single-quoted string like 'My Name' or '+'
82    // Can contain any characters except single quotes
83    #[regex(r"'[^']*'")]
84    UnrestrictedName,
85
86    #[regex(r"[0-9]+")]
87    Integer,
88
89    // Decimal/real number: supports optional decimal point and optional exponent
90    // Examples: 1.0, .5, 1E-24, 1.5e+10, 3.14E6
91    #[regex(r"[0-9]*\.[0-9]+([eE][+-]?[0-9]+)?|[0-9]+[eE][+-]?[0-9]+")]
92    Decimal,
93
94    #[regex(r#""([^"\\]|\\.)*""#)]
95    String,
96
97    // =========================================================================
98    // MULTI-CHARACTER PUNCTUATION (must come before single-char)
99    // =========================================================================
100    #[token("::>")]
101    ColonColonGt,
102
103    #[token(":>>")]
104    ColonGtGt,
105
106    #[token(":>")]
107    ColonGt,
108
109    #[token("::")]
110    ColonColon,
111
112    #[token(":=")]
113    ColonEq,
114
115    #[token("..")]
116    DotDot,
117
118    #[token("===")]
119    EqEqEq,
120
121    #[token("!==")]
122    BangEqEq,
123
124    #[token("==")]
125    EqEq,
126
127    #[token("!=")]
128    BangEq,
129
130    #[token("<=")]
131    LtEq,
132
133    #[token(">=")]
134    GtEq,
135
136    #[token("->")]
137    Arrow,
138
139    #[token("=>")]
140    FatArrow,
141
142    #[token("@@")]
143    AtAt,
144
145    #[token("**")]
146    StarStar,
147
148    #[token("??")]
149    QuestionQuestion,
150
151    #[token("&&")]
152    AmpAmp,
153
154    #[token("||")]
155    PipePipe,
156
157    // =========================================================================
158    // SINGLE-CHARACTER PUNCTUATION
159    // =========================================================================
160    #[token("{")]
161    LBrace,
162    #[token("}")]
163    RBrace,
164    #[token("[")]
165    LBracket,
166    #[token("]")]
167    RBracket,
168    #[token("(")]
169    LParen,
170    #[token(")")]
171    RParen,
172    #[token(";")]
173    Semicolon,
174    #[token(":")]
175    Colon,
176    #[token(".")]
177    Dot,
178    #[token(",")]
179    Comma,
180    #[token("=")]
181    Eq,
182    #[token("<")]
183    Lt,
184    #[token(">")]
185    Gt,
186    #[token("@")]
187    At,
188    #[token("#")]
189    Hash,
190    #[token("*")]
191    Star,
192    #[token("+")]
193    Plus,
194    #[token("-")]
195    Minus,
196    #[token("/")]
197    Slash,
198    #[token("%")]
199    Percent,
200    #[token("^")]
201    Caret,
202    #[token("~")]
203    Tilde,
204    #[token("?")]
205    Question,
206    #[token("!")]
207    Bang,
208    #[token("|")]
209    Pipe,
210    #[token("&")]
211    Amp,
212
213    // =========================================================================
214    // KEYWORDS (alphabetical, longest match wins in logos)
215    // =========================================================================
216    #[token("about")]
217    AboutKw,
218    #[token("abstract")]
219    AbstractKw,
220    #[token("accept")]
221    AcceptKw,
222    #[token("action")]
223    ActionKw,
224    #[token("actor")]
225    ActorKw,
226    #[token("after")]
227    AfterKw,
228    #[token("alias")]
229    AliasKw,
230    #[token("all")]
231    AllKw,
232    #[token("allocation")]
233    AllocationKw,
234    #[token("allocate")]
235    AllocateKw,
236    #[token("analysis")]
237    AnalysisKw,
238    #[token("and")]
239    AndKw,
240    #[token("as")]
241    AsKw,
242    #[token("assert")]
243    AssertKw,
244    #[token("assign")]
245    AssignKw,
246    #[token("assoc")]
247    AssocKw,
248    #[token("assume")]
249    AssumeKw,
250    #[token("at")]
251    AtKw,
252    #[token("attribute")]
253    AttributeKw,
254    #[token("behavior")]
255    BehaviorKw,
256    #[token("bind")]
257    BindKw,
258    #[token("binding")]
259    BindingKw,
260    #[token("by")]
261    ByKw,
262    #[token("calc")]
263    CalcKw,
264    #[token("case")]
265    CaseKw,
266    #[token("chains")]
267    ChainsKw,
268    #[token("class")]
269    ClassKw,
270    #[token("classifier")]
271    ClassifierKw,
272    #[token("comment")]
273    CommentKw,
274    #[token("composite")]
275    CompositeKw,
276    #[token("concern")]
277    ConcernKw,
278    #[token("connect")]
279    ConnectKw,
280    #[token("connection")]
281    ConnectionKw,
282    #[token("connector")]
283    ConnectorKw,
284    #[token("constant")]
285    ConstantKw,
286    #[token("constraint")]
287    ConstraintKw,
288    #[token("conjugates")]
289    ConjugatesKw,
290    #[token("crosses")]
291    CrossesKw,
292    #[token("datatype")]
293    DatatypeKw,
294    #[token("decide")]
295    DecideKw,
296    #[token("def")]
297    DefKw,
298    #[token("default")]
299    DefaultKw,
300    #[token("defined")]
301    DefinedKw,
302    #[token("dependency")]
303    DependencyKw,
304    #[token("derived")]
305    DerivedKw,
306    #[token("differs")]
307    DiffersKw,
308    #[token("disjoint")]
309    DisjointKw,
310    #[token("disjoining")]
311    DisjoiningKw,
312    #[token("do")]
313    DoKw,
314    #[token("doc")]
315    DocKw,
316    #[token("done")]
317    DoneKw,
318    #[token("else")]
319    ElseKw,
320    #[token("end")]
321    EndKw,
322    #[token("entry")]
323    EntryKw,
324    #[token("enum")]
325    EnumKw,
326    #[token("enumeration")]
327    EnumerationKw,
328    #[token("exhibit")]
329    ExhibitKw,
330    #[token("exit")]
331    ExitKw,
332    #[token("expose")]
333    ExposeKw,
334    #[token("event")]
335    EventKw,
336    #[token("expr")]
337    ExprKw,
338    #[token("false")]
339    FalseKw,
340    #[token("feature")]
341    FeatureKw,
342    #[token("filter")]
343    FilterKw,
344    #[token("first")]
345    FirstKw,
346    #[token("flow")]
347    FlowKw,
348    #[token("for")]
349    ForKw,
350    #[token("fork")]
351    ForkKw,
352    #[token("frame")]
353    FrameKw,
354    #[token("from")]
355    FromKw,
356    #[token("function")]
357    FunctionKw,
358    #[token("hastype")]
359    HastypeKw,
360    #[token("if")]
361    IfKw,
362    #[token("implies")]
363    ImpliesKw,
364    #[token("import")]
365    ImportKw,
366    #[token("in")]
367    InKw,
368    #[token("include")]
369    IncludeKw,
370    #[token("individual")]
371    IndividualKw,
372    #[token("inout")]
373    InoutKw,
374    #[token("interaction")]
375    InteractionKw,
376    #[token("interface")]
377    InterfaceKw,
378    #[token("intersects")]
379    IntersectsKw,
380    #[token("inv")]
381    InvKw,
382    #[token("inverse")]
383    InverseKw,
384    #[token("istype")]
385    IstypeKw,
386    #[token("item")]
387    ItemKw,
388    #[token("join")]
389    JoinKw,
390    #[token("language")]
391    LanguageKw,
392    #[token("library")]
393    LibraryKw,
394    #[token("locale")]
395    LocaleKw,
396    #[token("loop")]
397    LoopKw,
398    #[token("merge")]
399    MergeKw,
400    #[token("member")]
401    MemberKw,
402    #[token("message")]
403    MessageKw,
404    #[token("meta")]
405    MetaKw,
406    #[token("metaclass")]
407    MetaclassKw,
408    #[token("metadata")]
409    MetadataKw,
410    #[token("nonunique")]
411    NonuniqueKw,
412    #[token("not")]
413    NotKw,
414    #[token("new")]
415    NewKw,
416    #[token("null")]
417    NullKw,
418    #[token("objective")]
419    ObjectiveKw,
420    #[token("occurrence")]
421    OccurrenceKw,
422    #[token("of")]
423    OfKw,
424    #[token("or")]
425    OrKw,
426    #[token("ordered")]
427    OrderedKw,
428    #[token("out")]
429    OutKw,
430    #[token("package")]
431    PackageKw,
432    #[token("part")]
433    PartKw,
434    #[token("perform")]
435    PerformKw,
436    #[token("port")]
437    PortKw,
438    #[token("portion")]
439    PortionKw,
440    #[token("predicate")]
441    PredicateKw,
442    #[token("private")]
443    PrivateKw,
444    #[token("protected")]
445    ProtectedKw,
446    #[token("public")]
447    PublicKw,
448    #[token("readonly")]
449    ReadonlyKw,
450    #[token("redefines")]
451    RedefinesKw,
452    #[token("ref")]
453    RefKw,
454    #[token("references")]
455    ReferencesKw,
456    #[token("render")]
457    RenderKw,
458    #[token("rendering")]
459    RenderingKw,
460    #[token("rep")]
461    RepKw,
462    #[token("require")]
463    RequireKw,
464    #[token("requirement")]
465    RequirementKw,
466    #[token("return")]
467    ReturnKw,
468    #[token("satisfy")]
469    SatisfyKw,
470    #[token("send")]
471    SendKw,
472    #[token("specializes")]
473    SpecializesKw,
474    #[token("stakeholder")]
475    StakeholderKw,
476    #[token("standard")]
477    StandardKw,
478    #[token("start")]
479    StartKw,
480    #[token("state")]
481    StateKw,
482    #[token("step")]
483    StepKw,
484    #[token("struct")]
485    StructKw,
486    #[token("snapshot")]
487    SnapshotKw,
488    #[token("subject")]
489    SubjectKw,
490    #[token("subset")]
491    SubsetKw,
492    #[token("subsets")]
493    SubsetsKw,
494    #[token("succession")]
495    SuccessionKw,
496    #[token("terminate")]
497    TerminateKw,
498    #[token("then")]
499    ThenKw,
500    #[token("this")]
501    ThisKw,
502    #[token("timeslice")]
503    TimesliceKw,
504    #[token("to")]
505    ToKw,
506    #[token("transition")]
507    TransitionKw,
508    #[token("true")]
509    TrueKw,
510    #[token("type")]
511    TypeKw,
512    #[token("typed")]
513    TypedKw,
514    #[token("unions")]
515    UnionsKw,
516    #[token("until")]
517    UntilKw,
518    #[token("use")]
519    UseKw,
520    #[token("var")]
521    VarKw,
522    #[token("variant")]
523    VariantKw,
524    #[token("variation")]
525    VariationKw,
526    #[token("verification")]
527    VerificationKw,
528    #[token("verify")]
529    VerifyKw,
530    #[token("via")]
531    ViaKw,
532    #[token("view")]
533    ViewKw,
534    #[token("viewpoint")]
535    ViewpointKw,
536    #[token("when")]
537    WhenKw,
538    #[token("while")]
539    WhileKw,
540    #[token("xor")]
541    XorKw,
542}
543
544impl From<LogosToken> for SyntaxKind {
545    fn from(token: LogosToken) -> Self {
546        use LogosToken::*;
547        match token {
548            // Trivia
549            Whitespace => SyntaxKind::WHITESPACE,
550            LineComment => SyntaxKind::LINE_COMMENT,
551            BlockComment => SyntaxKind::BLOCK_COMMENT,
552
553            // Literals
554            Ident | UnrestrictedName => SyntaxKind::IDENT,
555            Integer => SyntaxKind::INTEGER,
556            Decimal => SyntaxKind::DECIMAL,
557            String => SyntaxKind::STRING,
558
559            // Multi-char punctuation
560            ColonColonGt => SyntaxKind::COLON_COLON_GT,
561            ColonGtGt => SyntaxKind::COLON_GT_GT,
562            ColonGt => SyntaxKind::COLON_GT,
563            ColonColon => SyntaxKind::COLON_COLON,
564            ColonEq => SyntaxKind::COLON_EQ,
565            DotDot => SyntaxKind::DOT_DOT,
566            EqEqEq => SyntaxKind::EQ_EQ_EQ,
567            BangEqEq => SyntaxKind::BANG_EQ_EQ,
568            EqEq => SyntaxKind::EQ_EQ,
569            BangEq => SyntaxKind::BANG_EQ,
570            LtEq => SyntaxKind::LT_EQ,
571            GtEq => SyntaxKind::GT_EQ,
572            Arrow => SyntaxKind::ARROW,
573            FatArrow => SyntaxKind::FAT_ARROW,
574            AtAt => SyntaxKind::AT_AT,
575            StarStar => SyntaxKind::STAR_STAR,
576            QuestionQuestion => SyntaxKind::QUESTION_QUESTION,
577            AmpAmp => SyntaxKind::AMP_AMP,
578            PipePipe => SyntaxKind::PIPE_PIPE,
579
580            // Single-char punctuation
581            LBrace => SyntaxKind::L_BRACE,
582            RBrace => SyntaxKind::R_BRACE,
583            LBracket => SyntaxKind::L_BRACKET,
584            RBracket => SyntaxKind::R_BRACKET,
585            LParen => SyntaxKind::L_PAREN,
586            RParen => SyntaxKind::R_PAREN,
587            Semicolon => SyntaxKind::SEMICOLON,
588            Colon => SyntaxKind::COLON,
589            Dot => SyntaxKind::DOT,
590            Comma => SyntaxKind::COMMA,
591            Eq => SyntaxKind::EQ,
592            Lt => SyntaxKind::LT,
593            Gt => SyntaxKind::GT,
594            At => SyntaxKind::AT,
595            Hash => SyntaxKind::HASH,
596            Star => SyntaxKind::STAR,
597            Plus => SyntaxKind::PLUS,
598            Minus => SyntaxKind::MINUS,
599            Slash => SyntaxKind::SLASH,
600            Percent => SyntaxKind::PERCENT,
601            Caret => SyntaxKind::CARET,
602            Tilde => SyntaxKind::TILDE,
603            Question => SyntaxKind::QUESTION,
604            Bang => SyntaxKind::BANG,
605            Pipe => SyntaxKind::PIPE,
606            Amp => SyntaxKind::AMP,
607
608            // Keywords
609            AboutKw => SyntaxKind::ABOUT_KW,
610            AbstractKw => SyntaxKind::ABSTRACT_KW,
611            AcceptKw => SyntaxKind::ACCEPT_KW,
612            ActionKw => SyntaxKind::ACTION_KW,
613            ActorKw => SyntaxKind::ACTOR_KW,
614            AfterKw => SyntaxKind::AFTER_KW,
615            AliasKw => SyntaxKind::ALIAS_KW,
616            AllKw => SyntaxKind::ALL_KW,
617            AllocationKw => SyntaxKind::ALLOCATION_KW,
618            AllocateKw => SyntaxKind::ALLOCATE_KW,
619            AnalysisKw => SyntaxKind::ANALYSIS_KW,
620            AndKw => SyntaxKind::AND_KW,
621            AsKw => SyntaxKind::AS_KW,
622            AssertKw => SyntaxKind::ASSERT_KW,
623            AssignKw => SyntaxKind::ASSIGN_KW,
624            AssocKw => SyntaxKind::ASSOC_KW,
625            AssumeKw => SyntaxKind::ASSUME_KW,
626            AtKw => SyntaxKind::AT_KW,
627            AttributeKw => SyntaxKind::ATTRIBUTE_KW,
628            BehaviorKw => SyntaxKind::BEHAVIOR_KW,
629            BindKw => SyntaxKind::BIND_KW,
630            BindingKw => SyntaxKind::BINDING_KW,
631            ByKw => SyntaxKind::BY_KW,
632            CalcKw => SyntaxKind::CALC_KW,
633            CaseKw => SyntaxKind::CASE_KW,
634            ChainsKw => SyntaxKind::CHAINS_KW,
635            ClassKw => SyntaxKind::CLASS_KW,
636            ClassifierKw => SyntaxKind::CLASSIFIER_KW,
637            CommentKw => SyntaxKind::COMMENT_KW,
638            CompositeKw => SyntaxKind::COMPOSITE_KW,
639            ConcernKw => SyntaxKind::CONCERN_KW,
640            ConnectKw => SyntaxKind::CONNECT_KW,
641            ConnectionKw => SyntaxKind::CONNECTION_KW,
642            ConnectorKw => SyntaxKind::CONNECTOR_KW,
643            ConstantKw => SyntaxKind::CONSTANT_KW,
644            ConstraintKw => SyntaxKind::CONSTRAINT_KW,
645            ConjugatesKw => SyntaxKind::CONJUGATES_KW,
646            CrossesKw => SyntaxKind::CROSSES_KW,
647            DatatypeKw => SyntaxKind::DATATYPE_KW,
648            DecideKw => SyntaxKind::DECIDE_KW,
649            DefKw => SyntaxKind::DEF_KW,
650            DefaultKw => SyntaxKind::DEFAULT_KW,
651            DefinedKw => SyntaxKind::DEFINED_KW,
652            DependencyKw => SyntaxKind::DEPENDENCY_KW,
653            DerivedKw => SyntaxKind::DERIVED_KW,
654            DiffersKw => SyntaxKind::DIFFERS_KW,
655            DisjointKw => SyntaxKind::DISJOINT_KW,
656            DisjoiningKw => SyntaxKind::DISJOINING_KW,
657            DoKw => SyntaxKind::DO_KW,
658            DocKw => SyntaxKind::DOC_KW,
659            DoneKw => SyntaxKind::DONE_KW,
660            ElseKw => SyntaxKind::ELSE_KW,
661            EndKw => SyntaxKind::END_KW,
662            EntryKw => SyntaxKind::ENTRY_KW,
663            EnumKw => SyntaxKind::ENUM_KW,
664            EnumerationKw => SyntaxKind::ENUMERATION_KW,
665            ExhibitKw => SyntaxKind::EXHIBIT_KW,
666            ExitKw => SyntaxKind::EXIT_KW,
667            ExposeKw => SyntaxKind::EXPOSE_KW,
668            EventKw => SyntaxKind::EVENT_KW,
669            ExprKw => SyntaxKind::EXPR_KW,
670            FalseKw => SyntaxKind::FALSE_KW,
671            FeatureKw => SyntaxKind::FEATURE_KW,
672            FilterKw => SyntaxKind::FILTER_KW,
673            FirstKw => SyntaxKind::FIRST_KW,
674            FlowKw => SyntaxKind::FLOW_KW,
675            ForKw => SyntaxKind::FOR_KW,
676            ForkKw => SyntaxKind::FORK_KW,
677            FrameKw => SyntaxKind::FRAME_KW,
678            FromKw => SyntaxKind::FROM_KW,
679            FunctionKw => SyntaxKind::FUNCTION_KW,
680            HastypeKw => SyntaxKind::HASTYPE_KW,
681            IfKw => SyntaxKind::IF_KW,
682            ImpliesKw => SyntaxKind::IMPLIES_KW,
683            ImportKw => SyntaxKind::IMPORT_KW,
684            InKw => SyntaxKind::IN_KW,
685            IncludeKw => SyntaxKind::INCLUDE_KW,
686            IndividualKw => SyntaxKind::INDIVIDUAL_KW,
687            InoutKw => SyntaxKind::INOUT_KW,
688            InteractionKw => SyntaxKind::INTERACTION_KW,
689            InterfaceKw => SyntaxKind::INTERFACE_KW,
690            IntersectsKw => SyntaxKind::INTERSECTS_KW,
691            InvKw => SyntaxKind::INV_KW,
692            InverseKw => SyntaxKind::INVERSE_KW,
693            IstypeKw => SyntaxKind::ISTYPE_KW,
694            ItemKw => SyntaxKind::ITEM_KW,
695            JoinKw => SyntaxKind::JOIN_KW,
696            LanguageKw => SyntaxKind::LANGUAGE_KW,
697            LibraryKw => SyntaxKind::LIBRARY_KW,
698            LocaleKw => SyntaxKind::LOCALE_KW,
699            LoopKw => SyntaxKind::LOOP_KW,
700            MemberKw => SyntaxKind::MEMBER_KW,
701            MergeKw => SyntaxKind::MERGE_KW,
702            MessageKw => SyntaxKind::MESSAGE_KW,
703            MetaKw => SyntaxKind::META_KW,
704            MetaclassKw => SyntaxKind::METACLASS_KW,
705            MetadataKw => SyntaxKind::METADATA_KW,
706            NonuniqueKw => SyntaxKind::NONUNIQUE_KW,
707            NotKw => SyntaxKind::NOT_KW,
708            NewKw => SyntaxKind::NEW_KW,
709            NullKw => SyntaxKind::NULL_KW,
710            ObjectiveKw => SyntaxKind::OBJECTIVE_KW,
711            OccurrenceKw => SyntaxKind::OCCURRENCE_KW,
712            OfKw => SyntaxKind::OF_KW,
713            OrKw => SyntaxKind::OR_KW,
714            OrderedKw => SyntaxKind::ORDERED_KW,
715            OutKw => SyntaxKind::OUT_KW,
716            PackageKw => SyntaxKind::PACKAGE_KW,
717            PartKw => SyntaxKind::PART_KW,
718            PerformKw => SyntaxKind::PERFORM_KW,
719            PortKw => SyntaxKind::PORT_KW,
720            PortionKw => SyntaxKind::PORTION_KW,
721            PredicateKw => SyntaxKind::PREDICATE_KW,
722            PrivateKw => SyntaxKind::PRIVATE_KW,
723            ProtectedKw => SyntaxKind::PROTECTED_KW,
724            PublicKw => SyntaxKind::PUBLIC_KW,
725            ReadonlyKw => SyntaxKind::READONLY_KW,
726            RedefinesKw => SyntaxKind::REDEFINES_KW,
727            RefKw => SyntaxKind::REF_KW,
728            ReferencesKw => SyntaxKind::REFERENCES_KW,
729            RenderKw => SyntaxKind::RENDER_KW,
730            RenderingKw => SyntaxKind::RENDERING_KW,
731            RepKw => SyntaxKind::REP_KW,
732            RequireKw => SyntaxKind::REQUIRE_KW,
733            RequirementKw => SyntaxKind::REQUIREMENT_KW,
734            ReturnKw => SyntaxKind::RETURN_KW,
735            SatisfyKw => SyntaxKind::SATISFY_KW,
736            SendKw => SyntaxKind::SEND_KW,
737            SpecializesKw => SyntaxKind::SPECIALIZES_KW,
738            StakeholderKw => SyntaxKind::STAKEHOLDER_KW,
739            StandardKw => SyntaxKind::STANDARD_KW,
740            StartKw => SyntaxKind::START_KW,
741            StateKw => SyntaxKind::STATE_KW,
742            StepKw => SyntaxKind::STEP_KW,
743            StructKw => SyntaxKind::STRUCT_KW,
744            SnapshotKw => SyntaxKind::SNAPSHOT_KW,
745            SubjectKw => SyntaxKind::SUBJECT_KW,
746            SubsetKw => SyntaxKind::SUBSET_KW,
747            SubsetsKw => SyntaxKind::SUBSETS_KW,
748            SuccessionKw => SyntaxKind::SUCCESSION_KW,
749            TerminateKw => SyntaxKind::TERMINATE_KW,
750            ThenKw => SyntaxKind::THEN_KW,
751            ThisKw => SyntaxKind::THIS_KW,
752            TimesliceKw => SyntaxKind::TIMESLICE_KW,
753            ToKw => SyntaxKind::TO_KW,
754            TransitionKw => SyntaxKind::TRANSITION_KW,
755            TrueKw => SyntaxKind::TRUE_KW,
756            TypeKw => SyntaxKind::TYPE_KW,
757            TypedKw => SyntaxKind::TYPED_KW,
758            UnionsKw => SyntaxKind::UNIONS_KW,
759            UntilKw => SyntaxKind::UNTIL_KW,
760            UseKw => SyntaxKind::USE_KW,
761            VarKw => SyntaxKind::VAR_KW,
762            VariantKw => SyntaxKind::VARIANT_KW,
763            VariationKw => SyntaxKind::VARIATION_KW,
764            VerificationKw => SyntaxKind::VERIFICATION_KW,
765            VerifyKw => SyntaxKind::VERIFY_KW,
766            ViaKw => SyntaxKind::VIA_KW,
767            ViewKw => SyntaxKind::VIEW_KW,
768            ViewpointKw => SyntaxKind::VIEWPOINT_KW,
769            WhenKw => SyntaxKind::WHEN_KW,
770            WhileKw => SyntaxKind::WHILE_KW,
771            XorKw => SyntaxKind::XOR_KW,
772        }
773    }
774}
775
776#[cfg(test)]
777mod tests {
778    use super::*;
779
780    #[test]
781    fn test_lex_package() {
782        let tokens: Vec<_> = Lexer::new("package Test;").collect();
783        assert_eq!(tokens.len(), 4); // package, whitespace, Test, ;
784        assert_eq!(tokens[0].kind, SyntaxKind::PACKAGE_KW);
785        assert_eq!(tokens[1].kind, SyntaxKind::WHITESPACE);
786        assert_eq!(tokens[2].kind, SyntaxKind::IDENT);
787        assert_eq!(tokens[3].kind, SyntaxKind::SEMICOLON);
788    }
789
790    #[test]
791    fn test_lex_qualified_name() {
792        let tokens: Vec<_> = Lexer::new("A::B::C").collect();
793        assert_eq!(tokens[0].kind, SyntaxKind::IDENT);
794        assert_eq!(tokens[1].kind, SyntaxKind::COLON_COLON);
795        assert_eq!(tokens[2].kind, SyntaxKind::IDENT);
796        assert_eq!(tokens[3].kind, SyntaxKind::COLON_COLON);
797        assert_eq!(tokens[4].kind, SyntaxKind::IDENT);
798    }
799
800    #[test]
801    fn test_lex_specializes() {
802        let tokens: Vec<_> = Lexer::new("part def A :> B;").collect();
803        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
804        assert!(kinds.contains(&SyntaxKind::PART_KW));
805        assert!(kinds.contains(&SyntaxKind::DEF_KW));
806        assert!(kinds.contains(&SyntaxKind::COLON_GT));
807    }
808
809    #[test]
810    fn test_lex_comment() {
811        let tokens: Vec<_> = Lexer::new("// comment\npackage").collect();
812        assert_eq!(tokens[0].kind, SyntaxKind::LINE_COMMENT);
813        assert_eq!(tokens[1].kind, SyntaxKind::WHITESPACE);
814        assert_eq!(tokens[2].kind, SyntaxKind::PACKAGE_KW);
815    }
816
817    #[test]
818    fn test_lex_import_wildcard() {
819        let tokens: Vec<_> = Lexer::new("import ISQ::*;").collect();
820        let kinds: Vec<_> = tokens.iter().map(|t| t.kind).collect();
821        assert!(kinds.contains(&SyntaxKind::IMPORT_KW));
822        assert!(kinds.contains(&SyntaxKind::COLON_COLON));
823        assert!(kinds.contains(&SyntaxKind::STAR));
824        assert!(kinds.contains(&SyntaxKind::SEMICOLON));
825    }
826}