awk_rs/lexer/
tokens.rs

1use crate::error::SourceLocation;
2
3/// All token types in AWK
4#[derive(Debug, Clone, PartialEq)]
5pub enum TokenKind {
6    // Literals
7    Number(f64),
8    String(String),
9    Regex(String),
10
11    // Identifiers and keywords
12    Identifier(String),
13    Begin,
14    End,
15    BeginFile, // gawk extension
16    EndFile,   // gawk extension
17    If,
18    Else,
19    While,
20    For,
21    Do,
22    Break,
23    Continue,
24    Function,
25    Return,
26    Delete,
27    Exit,
28    Next,
29    Nextfile,
30    Getline,
31    Print,
32    Printf,
33    In,
34
35    // Operators - Arithmetic
36    Plus,    // +
37    Minus,   // -
38    Star,    // *
39    Slash,   // /
40    Percent, // %
41    Caret,   // ^
42
43    // Operators - Comparison
44    Less,         // <
45    LessEqual,    // <=
46    Greater,      // >
47    GreaterEqual, // >=
48    Equal,        // ==
49    NotEqual,     // !=
50
51    // Operators - Logical
52    And, // &&
53    Or,  // ||
54    Not, // !
55
56    // Operators - Regex
57    Match,    // ~
58    NotMatch, // !~
59
60    // Operators - Assignment
61    Assign,        // =
62    PlusAssign,    // +=
63    MinusAssign,   // -=
64    StarAssign,    // *=
65    SlashAssign,   // /=
66    PercentAssign, // %=
67    CaretAssign,   // ^=
68
69    // Operators - Increment/Decrement
70    Increment, // ++
71    Decrement, // --
72
73    // Special operators
74    Dollar,   // $ (field access)
75    Question, // ?
76    Colon,    // :
77    Pipe,     // |
78    Append,   // >>
79
80    // Delimiters
81    LeftParen,    // (
82    RightParen,   // )
83    LeftBrace,    // {
84    RightBrace,   // }
85    LeftBracket,  // [
86    RightBracket, // ]
87    Semicolon,    // ;
88    Comma,        // ,
89    Newline,      // \n (significant in AWK)
90
91    // End of file
92    Eof,
93}
94
95impl TokenKind {
96    /// Check if this token is a keyword
97    pub fn is_keyword(&self) -> bool {
98        matches!(
99            self,
100            TokenKind::Begin
101                | TokenKind::End
102                | TokenKind::If
103                | TokenKind::Else
104                | TokenKind::While
105                | TokenKind::For
106                | TokenKind::Do
107                | TokenKind::Break
108                | TokenKind::Continue
109                | TokenKind::Function
110                | TokenKind::Return
111                | TokenKind::Delete
112                | TokenKind::Exit
113                | TokenKind::Next
114                | TokenKind::Nextfile
115                | TokenKind::Getline
116                | TokenKind::Print
117                | TokenKind::Printf
118                | TokenKind::In
119        )
120    }
121
122    /// Check if this token can start an expression
123    pub fn can_start_expression(&self) -> bool {
124        matches!(
125            self,
126            TokenKind::Number(_)
127                | TokenKind::String(_)
128                | TokenKind::Regex(_)
129                | TokenKind::Identifier(_)
130                | TokenKind::LeftParen
131                | TokenKind::Dollar
132                | TokenKind::Not
133                | TokenKind::Plus
134                | TokenKind::Minus
135                | TokenKind::Increment
136                | TokenKind::Decrement
137                | TokenKind::Getline
138        )
139    }
140
141    /// Check if this token produces a value (for regex vs division disambiguation)
142    pub fn produces_value(&self) -> bool {
143        matches!(
144            self,
145            TokenKind::Number(_)
146                | TokenKind::String(_)
147                | TokenKind::Identifier(_)
148                | TokenKind::RightParen
149                | TokenKind::RightBracket
150                | TokenKind::Increment
151                | TokenKind::Decrement
152        )
153    }
154}
155
156/// A token with its location in the source
157#[derive(Debug, Clone)]
158pub struct Token {
159    pub kind: TokenKind,
160    pub location: SourceLocation,
161}
162
163impl Token {
164    pub fn new(kind: TokenKind, line: usize, column: usize) -> Self {
165        Self {
166            kind,
167            location: SourceLocation::new(line, column),
168        }
169    }
170}
171
172/// Map keyword strings to token kinds
173pub fn keyword_to_token(s: &str) -> Option<TokenKind> {
174    match s {
175        "BEGIN" => Some(TokenKind::Begin),
176        "END" => Some(TokenKind::End),
177        "BEGINFILE" => Some(TokenKind::BeginFile),
178        "ENDFILE" => Some(TokenKind::EndFile),
179        "if" => Some(TokenKind::If),
180        "else" => Some(TokenKind::Else),
181        "while" => Some(TokenKind::While),
182        "for" => Some(TokenKind::For),
183        "do" => Some(TokenKind::Do),
184        "break" => Some(TokenKind::Break),
185        "continue" => Some(TokenKind::Continue),
186        "function" => Some(TokenKind::Function),
187        "return" => Some(TokenKind::Return),
188        "delete" => Some(TokenKind::Delete),
189        "exit" => Some(TokenKind::Exit),
190        "next" => Some(TokenKind::Next),
191        "nextfile" => Some(TokenKind::Nextfile),
192        "getline" => Some(TokenKind::Getline),
193        "print" => Some(TokenKind::Print),
194        "printf" => Some(TokenKind::Printf),
195        "in" => Some(TokenKind::In),
196        _ => None,
197    }
198}