1use perl_error::{ParseError, ParseResult};
23use perl_lexer::{LexerMode, PerlLexer, Token as LexerToken, TokenType as LexerTokenType};
24pub use perl_token::{Token, TokenKind};
25
26pub struct TokenStream<'a> {
28 lexer: PerlLexer<'a>,
29 peeked: Option<Token>,
30 peeked_second: Option<Token>,
31 peeked_third: Option<Token>,
32}
33
34impl<'a> TokenStream<'a> {
35 pub fn new(input: &'a str) -> Self {
37 TokenStream {
38 lexer: PerlLexer::new(input),
39 peeked: None,
40 peeked_second: None,
41 peeked_third: None,
42 }
43 }
44
45 pub fn peek(&mut self) -> ParseResult<&Token> {
47 if self.peeked.is_none() {
48 self.peeked = Some(self.next_token()?);
49 }
50 self.peeked.as_ref().ok_or(ParseError::UnexpectedEof)
52 }
53
54 #[allow(clippy::should_implement_trait)]
56 pub fn next(&mut self) -> ParseResult<Token> {
57 if let Some(token) = self.peeked.take() {
60 if token.kind == TokenKind::Eof {
63 self.peeked = Some(token.clone());
64 } else {
65 self.peeked = self.peeked_second.take();
66 self.peeked_second = self.peeked_third.take();
67 }
68 Ok(token)
69 } else {
70 let token = self.next_token()?;
71 if token.kind == TokenKind::Eof {
73 self.peeked = Some(token.clone());
74 }
75 Ok(token)
76 }
77 }
78
79 pub fn is_eof(&mut self) -> bool {
81 matches!(self.peek(), Ok(token) if token.kind == TokenKind::Eof)
82 }
83
84 pub fn peek_second(&mut self) -> ParseResult<&Token> {
86 self.peek()?;
88
89 if self.peeked_second.is_none() {
91 self.peeked_second = Some(self.next_token()?);
92 }
93
94 self.peeked_second.as_ref().ok_or(ParseError::UnexpectedEof)
96 }
97
98 pub fn peek_third(&mut self) -> ParseResult<&Token> {
100 self.peek_second()?;
102
103 if self.peeked_third.is_none() {
105 self.peeked_third = Some(self.next_token()?);
106 }
107
108 self.peeked_third.as_ref().ok_or(ParseError::UnexpectedEof)
110 }
111
112 pub fn enter_format_mode(&mut self) {
114 self.lexer.enter_format_mode();
115 }
116
117 pub fn on_stmt_boundary(&mut self) {
119 self.peeked = None;
121 self.peeked_second = None;
122 self.peeked_third = None;
123
124 self.lexer.set_mode(LexerMode::ExpectTerm);
126 }
127
128 pub fn relex_as_term(&mut self) {
136 if let Some(ref token) = self.peeked {
137 use perl_lexer::Checkpointable;
138 let pos = token.start;
139 let cp = perl_lexer::LexerCheckpoint::at_position(pos);
141 self.lexer.restore(&cp);
142 }
143 self.peeked = None;
144 self.peeked_second = None;
145 self.peeked_third = None;
146 }
147
148 pub fn invalidate_peek(&mut self) {
150 self.peeked = None;
151 self.peeked_third = None;
152 self.peeked_second = None;
153 }
154
155 pub fn peek_fresh_kind(&mut self) -> Option<TokenKind> {
157 self.invalidate_peek();
158 match self.peek() {
159 Ok(token) => Some(token.kind),
160 Err(_) => None,
161 }
162 }
163
164 fn next_token(&mut self) -> ParseResult<Token> {
166 loop {
168 let lexer_token = self.lexer.next_token().ok_or(ParseError::UnexpectedEof)?;
169
170 match &lexer_token.token_type {
171 LexerTokenType::Whitespace | LexerTokenType::Newline => continue,
172 LexerTokenType::Comment(_) => continue,
173 LexerTokenType::EOF => {
174 return Ok(Token {
175 kind: TokenKind::Eof,
176 text: String::new().into(),
177 start: lexer_token.start,
178 end: lexer_token.end,
179 });
180 }
181 _ => {
182 return Ok(self.convert_token(lexer_token));
183 }
184 }
185 }
186 }
187
188 fn convert_token(&self, token: LexerToken) -> Token {
190 let kind = match &token.token_type {
191 LexerTokenType::Keyword(kw) => match kw.as_ref() {
193 "my" => TokenKind::My,
194 "our" => TokenKind::Our,
195 "local" => TokenKind::Local,
196 "state" => TokenKind::State,
197 "sub" => TokenKind::Sub,
198 "if" => TokenKind::If,
199 "elsif" => TokenKind::Elsif,
200 "else" => TokenKind::Else,
201 "unless" => TokenKind::Unless,
202 "while" => TokenKind::While,
203 "until" => TokenKind::Until,
204 "for" => TokenKind::For,
205 "foreach" => TokenKind::Foreach,
206 "return" => TokenKind::Return,
207 "package" => TokenKind::Package,
208 "use" => TokenKind::Use,
209 "no" => TokenKind::No,
210 "BEGIN" => TokenKind::Begin,
211 "END" => TokenKind::End,
212 "CHECK" => TokenKind::Check,
213 "INIT" => TokenKind::Init,
214 "UNITCHECK" => TokenKind::Unitcheck,
215 "eval" => TokenKind::Eval,
216 "do" => TokenKind::Do,
217 "given" => TokenKind::Given,
218 "when" => TokenKind::When,
219 "default" => TokenKind::Default,
220 "try" => TokenKind::Try,
221 "catch" => TokenKind::Catch,
222 "field" => TokenKind::Field,
223 "finally" => TokenKind::Finally,
224 "continue" => TokenKind::Continue,
225 "next" => TokenKind::Next,
226 "last" => TokenKind::Last,
227 "redo" => TokenKind::Redo,
228 "goto" => TokenKind::Goto,
229 "class" => TokenKind::Class,
230 "method" => TokenKind::Method,
231 "format" => TokenKind::Format,
232 "undef" => TokenKind::Undef,
233 "and" => TokenKind::WordAnd,
234 "or" => TokenKind::WordOr,
235 "not" => TokenKind::WordNot,
236 "xor" => TokenKind::WordXor,
237 "cmp" => TokenKind::StringCompare,
238 "qw" => TokenKind::Identifier, _ => TokenKind::Identifier,
240 },
241
242 LexerTokenType::Operator(op) => match op.as_ref() {
244 "=" => TokenKind::Assign,
245 "+" => TokenKind::Plus,
246 "-" => TokenKind::Minus,
247 "*" => TokenKind::Star,
248 "/" => TokenKind::Slash,
249 "%" => TokenKind::Percent,
250 "**" => TokenKind::Power,
251 "<<" => TokenKind::LeftShift,
252 ">>" => TokenKind::RightShift,
253 "&" => TokenKind::BitwiseAnd,
254 "|" => TokenKind::BitwiseOr,
255 "^" => TokenKind::BitwiseXor,
256 "~" => TokenKind::BitwiseNot,
257 "+=" => TokenKind::PlusAssign,
259 "-=" => TokenKind::MinusAssign,
260 "*=" => TokenKind::StarAssign,
261 "/=" => TokenKind::SlashAssign,
262 "%=" => TokenKind::PercentAssign,
263 ".=" => TokenKind::DotAssign,
264 "&=" => TokenKind::AndAssign,
265 "|=" => TokenKind::OrAssign,
266 "^=" => TokenKind::XorAssign,
267 "**=" => TokenKind::PowerAssign,
268 "<<=" => TokenKind::LeftShiftAssign,
269 ">>=" => TokenKind::RightShiftAssign,
270 "&&=" => TokenKind::LogicalAndAssign,
271 "||=" => TokenKind::LogicalOrAssign,
272 "//=" => TokenKind::DefinedOrAssign,
273 "==" => TokenKind::Equal,
274 "!=" => TokenKind::NotEqual,
275 "=~" => TokenKind::Match,
276 "!~" => TokenKind::NotMatch,
277 "~~" => TokenKind::SmartMatch,
278 "<" => TokenKind::Less,
279 ">" => TokenKind::Greater,
280 "<=" => TokenKind::LessEqual,
281 ">=" => TokenKind::GreaterEqual,
282 "<=>" => TokenKind::Spaceship,
283 "&&" => TokenKind::And,
284 "||" => TokenKind::Or,
285 "!" => TokenKind::Not,
286 "//" => TokenKind::DefinedOr,
287 "->" => TokenKind::Arrow,
288 "=>" => TokenKind::FatArrow,
289 "." => TokenKind::Dot,
290 ".." => TokenKind::Range,
291 "..." => TokenKind::Ellipsis,
292 "++" => TokenKind::Increment,
293 "--" => TokenKind::Decrement,
294 "::" => TokenKind::DoubleColon,
295 "?" => TokenKind::Question,
296 ":" => TokenKind::Colon,
297 "\\" => TokenKind::Backslash,
298 "$" => TokenKind::ScalarSigil,
300 "@" => TokenKind::ArraySigil,
301 _ => TokenKind::Unknown,
305 },
306
307 LexerTokenType::Arrow => TokenKind::Arrow,
309 LexerTokenType::FatComma => TokenKind::FatArrow,
310
311 LexerTokenType::LeftParen => TokenKind::LeftParen,
313 LexerTokenType::RightParen => TokenKind::RightParen,
314 LexerTokenType::LeftBrace => TokenKind::LeftBrace,
315 LexerTokenType::RightBrace => TokenKind::RightBrace,
316 LexerTokenType::LeftBracket => TokenKind::LeftBracket,
317 LexerTokenType::RightBracket => TokenKind::RightBracket,
318 LexerTokenType::Semicolon => TokenKind::Semicolon,
319 LexerTokenType::Comma => TokenKind::Comma,
320
321 LexerTokenType::Division => TokenKind::Slash,
323
324 LexerTokenType::Number(_) => TokenKind::Number,
326 LexerTokenType::StringLiteral | LexerTokenType::InterpolatedString(_) => {
327 TokenKind::String
328 }
329 LexerTokenType::RegexMatch | LexerTokenType::QuoteRegex => TokenKind::Regex,
330 LexerTokenType::Substitution => TokenKind::Substitution,
331 LexerTokenType::Transliteration => TokenKind::Transliteration,
332 LexerTokenType::QuoteSingle => TokenKind::QuoteSingle,
333 LexerTokenType::QuoteDouble => TokenKind::QuoteDouble,
334 LexerTokenType::QuoteWords => TokenKind::QuoteWords,
335 LexerTokenType::QuoteCommand => TokenKind::QuoteCommand,
336 LexerTokenType::HeredocStart => TokenKind::HeredocStart,
337 LexerTokenType::HeredocBody(_) => TokenKind::HeredocBody,
338 LexerTokenType::FormatBody(_) => TokenKind::FormatBody,
339 LexerTokenType::Version(_) => TokenKind::VString,
340 LexerTokenType::DataMarker(_) => TokenKind::DataMarker,
341 LexerTokenType::DataBody(_) => TokenKind::DataBody,
342 LexerTokenType::UnknownRest => TokenKind::UnknownRest,
343
344 LexerTokenType::Identifier(text) => {
346 match text.as_ref() {
348 "no" => TokenKind::No,
349 "*" => TokenKind::Star, "$" => TokenKind::ScalarSigil,
351 "@" => TokenKind::ArraySigil,
352 "%" => TokenKind::HashSigil,
353 "&" => TokenKind::SubSigil,
354 _ => TokenKind::Identifier,
355 }
356 }
357
358 LexerTokenType::Error(msg) => {
360 if msg.as_ref() == "Heredoc nesting too deep" {
362 TokenKind::HeredocDepthLimit
363 } else {
364 match token.text.as_ref() {
366 "{" => TokenKind::LeftBrace,
367 "}" => TokenKind::RightBrace,
368 _ => TokenKind::Unknown,
369 }
370 }
371 }
372
373 _ => TokenKind::Unknown,
374 };
375
376 Token { kind, text: token.text, start: token.start, end: token.end }
377 }
378}