use std::{ops::Range, sync::Arc};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct TokenSpan {
pub start: usize,
pub end: usize,
}
impl TokenSpan {
pub const fn new(start: usize, end: usize) -> Self {
Self { start, end }
}
pub fn try_new(start: usize, end: usize) -> Result<Self, TokenSpanError> {
if end < start {
return Err(TokenSpanError::EndBeforeStart { start, end });
}
Ok(Self { start, end })
}
pub const fn len(self) -> usize {
self.end.saturating_sub(self.start)
}
pub const fn is_empty(self) -> bool {
self.len() == 0
}
pub const fn range(self) -> Range<usize> {
self.start..self.end
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TokenSpanError {
EndBeforeStart { start: usize, end: usize },
EmptySpanNotAllowed { kind: TokenKind, at: usize },
}
impl std::fmt::Display for TokenSpanError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::EndBeforeStart { start, end } => {
write!(f, "token span invariant violated: end ({end}) < start ({start})")
}
Self::EmptySpanNotAllowed { kind, at } => {
write!(f, "empty span not allowed for token kind {kind:?} at byte {at}")
}
}
}
}
impl std::error::Error for TokenSpanError {}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct TokenRef<'src> {
pub kind: TokenKind,
pub text: &'src str,
pub start: usize,
pub end: usize,
}
impl<'src> TokenRef<'src> {
pub fn new(kind: TokenKind, text: &'src str, start: usize, end: usize) -> Self {
Self { kind, text, start, end }
}
pub fn try_new(
kind: TokenKind,
text: &'src str,
start: usize,
end: usize,
) -> Result<Self, TokenSpanError> {
let span = TokenSpan::try_new(start, end)?;
Ok(Self { kind, text, start: span.start, end: span.end })
}
pub fn new_checked(
kind: TokenKind,
text: &'src str,
start: usize,
end: usize,
) -> Result<Self, TokenSpanError> {
let token = Self::try_new(kind, text, start, end)?;
if token.is_empty() && !matches!(token.kind, TokenKind::Eof | TokenKind::Unknown) {
return Err(TokenSpanError::EmptySpanNotAllowed { kind: token.kind, at: token.start });
}
Ok(token)
}
pub fn len(self) -> usize {
self.end.saturating_sub(self.start)
}
pub fn is_empty(self) -> bool {
self.len() == 0
}
pub fn span(self) -> (usize, usize) {
(self.start, self.end)
}
pub fn display_name(self) -> &'static str {
self.kind.display_name()
}
pub fn to_owned_token(self) -> Token {
Token::new(self.kind, self.text, self.start, self.end)
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Token {
pub kind: TokenKind,
pub text: Arc<str>,
pub start: usize,
pub end: usize,
}
impl Token {
pub fn new(kind: TokenKind, text: impl Into<Arc<str>>, start: usize, end: usize) -> Self {
Token { kind, text: text.into(), start, end }
}
pub fn try_new(
kind: TokenKind,
text: impl Into<Arc<str>>,
start: usize,
end: usize,
) -> Result<Self, TokenSpanError> {
let span = TokenSpan::try_new(start, end)?;
Ok(Self { kind, text: text.into(), start: span.start, end: span.end })
}
pub fn new_checked(
kind: TokenKind,
text: impl Into<Arc<str>>,
start: usize,
end: usize,
) -> Result<Self, TokenSpanError> {
let token = Self::try_new(kind, text, start, end)?;
if token.is_empty() && !matches!(token.kind, TokenKind::Eof | TokenKind::Unknown) {
return Err(TokenSpanError::EmptySpanNotAllowed { kind: token.kind, at: token.start });
}
Ok(token)
}
pub fn eof_at(pos: usize) -> Self {
Self::new(TokenKind::Eof, "", pos, pos)
}
pub fn unknown_at(text: impl Into<Arc<str>>, start: usize, end: usize) -> Self {
let bounded_end = end.max(start);
Self::new(TokenKind::Unknown, text, start, bounded_end)
}
pub fn span(&self) -> TokenSpan {
TokenSpan::new(self.start, self.end)
}
pub fn range(&self) -> Range<usize> {
self.span().range()
}
pub fn with_span(&self, start: usize, end: usize) -> Result<Self, TokenSpanError> {
Self::new_checked(self.kind, self.text.clone(), start, end)
}
pub fn with_kind(&self, kind: TokenKind) -> Self {
Self::new(kind, self.text.clone(), self.start, self.end)
}
pub fn len(&self) -> usize {
self.end.saturating_sub(self.start)
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn display_name(&self) -> &'static str {
self.kind.display_name()
}
pub fn as_ref_token(&self) -> TokenRef<'_> {
TokenRef { kind: self.kind, text: self.text.as_ref(), start: self.start, end: self.end }
}
}
impl From<TokenRef<'_>> for Token {
fn from(value: TokenRef<'_>) -> Self {
value.to_owned_token()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TokenKind {
My,
Our,
Local,
State,
Sub,
If,
Elsif,
Else,
Unless,
While,
Until,
For,
Foreach,
Return,
Package,
Use,
No,
Begin,
End,
Check,
Init,
Unitcheck,
Eval,
Do,
Given,
When,
Default,
Try,
Catch,
Finally,
Continue,
Next,
Last,
Redo,
Goto,
Class,
Method,
Field,
Format,
Undef,
Defer,
Assign,
Plus,
Minus,
Star,
Slash,
Percent,
Power,
LeftShift,
RightShift,
BitwiseAnd,
BitwiseOr,
BitwiseXor,
BitwiseNot,
PlusAssign,
MinusAssign,
StarAssign,
SlashAssign,
PercentAssign,
DotAssign,
AndAssign,
OrAssign,
XorAssign,
PowerAssign,
LeftShiftAssign,
RightShiftAssign,
LogicalAndAssign,
LogicalOrAssign,
DefinedOrAssign,
Equal,
NotEqual,
Match,
NotMatch,
SmartMatch,
Less,
Greater,
LessEqual,
GreaterEqual,
Spaceship,
StringCompare,
And,
Or,
Not,
DefinedOr,
WordAnd,
WordOr,
WordNot,
WordXor,
Arrow,
FatArrow,
Dot,
Range,
Ellipsis,
Increment,
Decrement,
DoubleColon,
Question,
Colon,
Backslash,
LeftParen,
RightParen,
LeftBrace,
RightBrace,
LeftBracket,
RightBracket,
Semicolon,
Comma,
Number,
String,
Regex,
Substitution,
Transliteration,
QuoteSingle,
QuoteDouble,
QuoteWords,
QuoteCommand,
HeredocStart,
HeredocBody,
FormatBody,
DataMarker,
DataBody,
VString,
UnknownRest,
HeredocDepthLimit,
Identifier,
ScalarSigil,
ArraySigil,
HashSigil,
SubSigil,
GlobSigil,
Eof,
Unknown,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TokenCategory {
Keyword,
Operator,
Delimiter,
Literal,
Identifier,
Special,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct TokenKindMetadata {
pub category: TokenCategory,
pub display_name: &'static str,
}
impl TokenKind {
pub const fn all() -> &'static [TokenKind] {
&TOKEN_KIND_ALL
}
pub const fn metadata_count() -> usize {
TOKEN_KIND_ALL.len()
}
pub fn metadata(self) -> TokenKindMetadata {
TokenKindMetadata { category: self.category(), display_name: self.display_name() }
}
pub const fn category(self) -> TokenCategory {
match self {
TokenKind::My
| TokenKind::Our
| TokenKind::Local
| TokenKind::State
| TokenKind::Sub
| TokenKind::If
| TokenKind::Elsif
| TokenKind::Else
| TokenKind::Unless
| TokenKind::While
| TokenKind::Until
| TokenKind::For
| TokenKind::Foreach
| TokenKind::Return
| TokenKind::Package
| TokenKind::Use
| TokenKind::No
| TokenKind::Begin
| TokenKind::End
| TokenKind::Check
| TokenKind::Init
| TokenKind::Unitcheck
| TokenKind::Eval
| TokenKind::Do
| TokenKind::Given
| TokenKind::When
| TokenKind::Default
| TokenKind::Try
| TokenKind::Catch
| TokenKind::Finally
| TokenKind::Continue
| TokenKind::Next
| TokenKind::Last
| TokenKind::Redo
| TokenKind::Goto
| TokenKind::Class
| TokenKind::Method
| TokenKind::Field
| TokenKind::Format
| TokenKind::Undef
| TokenKind::Defer => TokenCategory::Keyword,
TokenKind::Assign
| TokenKind::Plus
| TokenKind::Minus
| TokenKind::Star
| TokenKind::Slash
| TokenKind::Percent
| TokenKind::Power
| TokenKind::LeftShift
| TokenKind::RightShift
| TokenKind::BitwiseAnd
| TokenKind::BitwiseOr
| TokenKind::BitwiseXor
| TokenKind::BitwiseNot
| TokenKind::PlusAssign
| TokenKind::MinusAssign
| TokenKind::StarAssign
| TokenKind::SlashAssign
| TokenKind::PercentAssign
| TokenKind::DotAssign
| TokenKind::AndAssign
| TokenKind::OrAssign
| TokenKind::XorAssign
| TokenKind::PowerAssign
| TokenKind::LeftShiftAssign
| TokenKind::RightShiftAssign
| TokenKind::LogicalAndAssign
| TokenKind::LogicalOrAssign
| TokenKind::DefinedOrAssign
| TokenKind::Equal
| TokenKind::NotEqual
| TokenKind::Match
| TokenKind::NotMatch
| TokenKind::SmartMatch
| TokenKind::Less
| TokenKind::Greater
| TokenKind::LessEqual
| TokenKind::GreaterEqual
| TokenKind::Spaceship
| TokenKind::StringCompare
| TokenKind::And
| TokenKind::Or
| TokenKind::Not
| TokenKind::DefinedOr
| TokenKind::WordAnd
| TokenKind::WordOr
| TokenKind::WordNot
| TokenKind::WordXor
| TokenKind::Arrow
| TokenKind::FatArrow
| TokenKind::Dot
| TokenKind::Range
| TokenKind::Ellipsis
| TokenKind::Increment
| TokenKind::Decrement
| TokenKind::DoubleColon
| TokenKind::Question
| TokenKind::Colon
| TokenKind::Backslash => TokenCategory::Operator,
TokenKind::LeftParen
| TokenKind::RightParen
| TokenKind::LeftBrace
| TokenKind::RightBrace
| TokenKind::LeftBracket
| TokenKind::RightBracket
| TokenKind::Semicolon
| TokenKind::Comma => TokenCategory::Delimiter,
TokenKind::Number
| TokenKind::String
| TokenKind::Regex
| TokenKind::Substitution
| TokenKind::Transliteration
| TokenKind::QuoteSingle
| TokenKind::QuoteDouble
| TokenKind::QuoteWords
| TokenKind::QuoteCommand
| TokenKind::HeredocStart
| TokenKind::HeredocBody
| TokenKind::FormatBody
| TokenKind::DataMarker
| TokenKind::DataBody
| TokenKind::VString
| TokenKind::UnknownRest
| TokenKind::HeredocDepthLimit => TokenCategory::Literal,
TokenKind::Identifier
| TokenKind::ScalarSigil
| TokenKind::ArraySigil
| TokenKind::HashSigil
| TokenKind::SubSigil
| TokenKind::GlobSigil => TokenCategory::Identifier,
TokenKind::Eof | TokenKind::Unknown => TokenCategory::Special,
}
}
pub const fn is_keyword(self) -> bool {
matches!(self.category(), TokenCategory::Keyword)
}
pub const fn is_operator(self) -> bool {
matches!(self.category(), TokenCategory::Operator)
}
pub const fn is_literal(self) -> bool {
matches!(self.category(), TokenCategory::Literal)
}
pub const fn is_delimiter(self) -> bool {
matches!(self.category(), TokenCategory::Delimiter)
}
pub const fn is_identifier(self) -> bool {
matches!(self.category(), TokenCategory::Identifier)
}
pub const fn is_special(self) -> bool {
matches!(self.category(), TokenCategory::Special)
}
#[inline]
pub fn is_assignment_operator(self) -> bool {
matches!(
self,
TokenKind::Assign
| TokenKind::PlusAssign
| TokenKind::MinusAssign
| TokenKind::StarAssign
| TokenKind::SlashAssign
| TokenKind::PercentAssign
| TokenKind::DotAssign
| TokenKind::AndAssign
| TokenKind::OrAssign
| TokenKind::XorAssign
| TokenKind::PowerAssign
| TokenKind::LeftShiftAssign
| TokenKind::RightShiftAssign
| TokenKind::LogicalAndAssign
| TokenKind::LogicalOrAssign
| TokenKind::DefinedOrAssign
)
}
#[inline]
pub fn is_comparison_operator(self) -> bool {
matches!(
self,
TokenKind::Equal
| TokenKind::NotEqual
| TokenKind::Less
| TokenKind::Greater
| TokenKind::LessEqual
| TokenKind::GreaterEqual
| TokenKind::Spaceship
| TokenKind::StringCompare
| TokenKind::Match
| TokenKind::NotMatch
| TokenKind::SmartMatch
)
}
#[inline]
pub fn is_logical_operator(self) -> bool {
matches!(
self,
TokenKind::And
| TokenKind::Or
| TokenKind::Not
| TokenKind::DefinedOr
| TokenKind::WordAnd
| TokenKind::WordOr
| TokenKind::WordNot
| TokenKind::WordXor
)
}
#[inline]
pub fn is_word_operator(self) -> bool {
matches!(
self,
TokenKind::StringCompare
| TokenKind::WordAnd
| TokenKind::WordOr
| TokenKind::WordNot
| TokenKind::WordXor
)
}
#[inline]
pub fn is_low_precedence_word_operator(self) -> bool {
matches!(
self,
TokenKind::WordAnd | TokenKind::WordOr | TokenKind::WordNot | TokenKind::WordXor
)
}
#[inline]
pub fn is_open_delimiter(self) -> bool {
matches!(self, TokenKind::LeftParen | TokenKind::LeftBrace | TokenKind::LeftBracket)
}
#[inline]
pub fn is_close_delimiter(self) -> bool {
matches!(self, TokenKind::RightParen | TokenKind::RightBrace | TokenKind::RightBracket)
}
#[inline]
pub fn matching_delimiter(self) -> Option<Self> {
match self {
TokenKind::LeftParen => Some(TokenKind::RightParen),
TokenKind::RightParen => Some(TokenKind::LeftParen),
TokenKind::LeftBrace => Some(TokenKind::RightBrace),
TokenKind::RightBrace => Some(TokenKind::LeftBrace),
TokenKind::LeftBracket => Some(TokenKind::RightBracket),
TokenKind::RightBracket => Some(TokenKind::LeftBracket),
_ => None,
}
}
#[inline]
pub fn is_quote_like(self) -> bool {
matches!(
self,
TokenKind::Regex
| TokenKind::Substitution
| TokenKind::Transliteration
| TokenKind::QuoteSingle
| TokenKind::QuoteDouble
| TokenKind::QuoteWords
| TokenKind::QuoteCommand
| TokenKind::HeredocStart
)
}
#[inline]
pub fn is_recovery_boundary(self) -> bool {
self == TokenKind::Semicolon || self.is_close_delimiter() || self == TokenKind::Eof
}
pub fn from_keyword(spelling: &str) -> Option<TokenKind> {
match spelling {
"my" => Some(TokenKind::My),
"our" => Some(TokenKind::Our),
"local" => Some(TokenKind::Local),
"state" => Some(TokenKind::State),
"sub" => Some(TokenKind::Sub),
"if" => Some(TokenKind::If),
"elsif" => Some(TokenKind::Elsif),
"else" => Some(TokenKind::Else),
"unless" => Some(TokenKind::Unless),
"while" => Some(TokenKind::While),
"until" => Some(TokenKind::Until),
"for" => Some(TokenKind::For),
"foreach" => Some(TokenKind::Foreach),
"return" => Some(TokenKind::Return),
"package" => Some(TokenKind::Package),
"use" => Some(TokenKind::Use),
"no" => Some(TokenKind::No),
"BEGIN" => Some(TokenKind::Begin),
"END" => Some(TokenKind::End),
"CHECK" => Some(TokenKind::Check),
"INIT" => Some(TokenKind::Init),
"UNITCHECK" => Some(TokenKind::Unitcheck),
"eval" => Some(TokenKind::Eval),
"do" => Some(TokenKind::Do),
"given" => Some(TokenKind::Given),
"when" => Some(TokenKind::When),
"default" => Some(TokenKind::Default),
"try" => Some(TokenKind::Try),
"catch" => Some(TokenKind::Catch),
"finally" => Some(TokenKind::Finally),
"continue" => Some(TokenKind::Continue),
"next" => Some(TokenKind::Next),
"last" => Some(TokenKind::Last),
"redo" => Some(TokenKind::Redo),
"goto" => Some(TokenKind::Goto),
"class" => Some(TokenKind::Class),
"method" => Some(TokenKind::Method),
"field" => Some(TokenKind::Field),
"format" => Some(TokenKind::Format),
"undef" => Some(TokenKind::Undef),
"defer" => Some(TokenKind::Defer),
"and" => Some(TokenKind::WordAnd),
"or" => Some(TokenKind::WordOr),
"not" => Some(TokenKind::WordNot),
"xor" => Some(TokenKind::WordXor),
"cmp" => Some(TokenKind::StringCompare),
_ => None,
}
}
pub fn from_operator(spelling: &str) -> Option<TokenKind> {
match spelling {
"=" => Some(TokenKind::Assign),
"+" => Some(TokenKind::Plus),
"-" => Some(TokenKind::Minus),
"*" => Some(TokenKind::Star),
"/" => Some(TokenKind::Slash),
"%" => Some(TokenKind::Percent),
"**" => Some(TokenKind::Power),
"<<" => Some(TokenKind::LeftShift),
">>" => Some(TokenKind::RightShift),
"&" => Some(TokenKind::BitwiseAnd),
"|" => Some(TokenKind::BitwiseOr),
"^" => Some(TokenKind::BitwiseXor),
"~" => Some(TokenKind::BitwiseNot),
"+=" => Some(TokenKind::PlusAssign),
"-=" => Some(TokenKind::MinusAssign),
"*=" => Some(TokenKind::StarAssign),
"/=" => Some(TokenKind::SlashAssign),
"%=" => Some(TokenKind::PercentAssign),
".=" => Some(TokenKind::DotAssign),
"&=" => Some(TokenKind::AndAssign),
"|=" => Some(TokenKind::OrAssign),
"^=" => Some(TokenKind::XorAssign),
"**=" => Some(TokenKind::PowerAssign),
"<<=" => Some(TokenKind::LeftShiftAssign),
">>=" => Some(TokenKind::RightShiftAssign),
"&&=" => Some(TokenKind::LogicalAndAssign),
"||=" => Some(TokenKind::LogicalOrAssign),
"//=" => Some(TokenKind::DefinedOrAssign),
"==" => Some(TokenKind::Equal),
"!=" => Some(TokenKind::NotEqual),
"=~" => Some(TokenKind::Match),
"!~" => Some(TokenKind::NotMatch),
"~~" => Some(TokenKind::SmartMatch),
"<" => Some(TokenKind::Less),
">" => Some(TokenKind::Greater),
"<=" => Some(TokenKind::LessEqual),
">=" => Some(TokenKind::GreaterEqual),
"<=>" => Some(TokenKind::Spaceship),
"&&" => Some(TokenKind::And),
"||" => Some(TokenKind::Or),
"!" => Some(TokenKind::Not),
"//" => Some(TokenKind::DefinedOr),
"->" => Some(TokenKind::Arrow),
"=>" => Some(TokenKind::FatArrow),
"." => Some(TokenKind::Dot),
".." => Some(TokenKind::Range),
"..." => Some(TokenKind::Ellipsis),
"++" => Some(TokenKind::Increment),
"--" => Some(TokenKind::Decrement),
"::" => Some(TokenKind::DoubleColon),
"?" => Some(TokenKind::Question),
":" => Some(TokenKind::Colon),
"\\" => Some(TokenKind::Backslash),
_ => None,
}
}
pub fn from_delimiter(spelling: &str) -> Option<TokenKind> {
match spelling {
"(" => Some(TokenKind::LeftParen),
")" => Some(TokenKind::RightParen),
"{" => Some(TokenKind::LeftBrace),
"}" => Some(TokenKind::RightBrace),
"[" => Some(TokenKind::LeftBracket),
"]" => Some(TokenKind::RightBracket),
";" => Some(TokenKind::Semicolon),
"," => Some(TokenKind::Comma),
_ => None,
}
}
pub fn from_sigil(spelling: &str) -> Option<TokenKind> {
match spelling {
"$" => Some(TokenKind::ScalarSigil),
"@" => Some(TokenKind::ArraySigil),
"%" => Some(TokenKind::HashSigil),
"&" => Some(TokenKind::SubSigil),
"*" => Some(TokenKind::GlobSigil),
_ => None,
}
}
pub fn display_name(self) -> &'static str {
match self {
TokenKind::My => "'my'",
TokenKind::Our => "'our'",
TokenKind::Local => "'local'",
TokenKind::State => "'state'",
TokenKind::Sub => "'sub'",
TokenKind::If => "'if'",
TokenKind::Elsif => "'elsif'",
TokenKind::Else => "'else'",
TokenKind::Unless => "'unless'",
TokenKind::While => "'while'",
TokenKind::Until => "'until'",
TokenKind::For => "'for'",
TokenKind::Foreach => "'foreach'",
TokenKind::Return => "'return'",
TokenKind::Package => "'package'",
TokenKind::Use => "'use'",
TokenKind::No => "'no'",
TokenKind::Begin => "'BEGIN'",
TokenKind::End => "'END'",
TokenKind::Check => "'CHECK'",
TokenKind::Init => "'INIT'",
TokenKind::Unitcheck => "'UNITCHECK'",
TokenKind::Eval => "'eval'",
TokenKind::Do => "'do'",
TokenKind::Given => "'given'",
TokenKind::When => "'when'",
TokenKind::Default => "'default'",
TokenKind::Try => "'try'",
TokenKind::Catch => "'catch'",
TokenKind::Finally => "'finally'",
TokenKind::Continue => "'continue'",
TokenKind::Next => "'next'",
TokenKind::Last => "'last'",
TokenKind::Redo => "'redo'",
TokenKind::Goto => "'goto'",
TokenKind::Class => "'class'",
TokenKind::Method => "'method'",
TokenKind::Field => "'field'",
TokenKind::Format => "'format'",
TokenKind::Undef => "'undef'",
TokenKind::Defer => "'defer'",
TokenKind::Assign => "'='",
TokenKind::Plus => "'+'",
TokenKind::Minus => "'-'",
TokenKind::Star => "'*'",
TokenKind::Slash => "'/'",
TokenKind::Percent => "'%'",
TokenKind::Power => "'**'",
TokenKind::LeftShift => "'<<'",
TokenKind::RightShift => "'>>'",
TokenKind::BitwiseAnd => "'&'",
TokenKind::BitwiseOr => "'|'",
TokenKind::BitwiseXor => "'^'",
TokenKind::BitwiseNot => "'~'",
TokenKind::PlusAssign => "'+='",
TokenKind::MinusAssign => "'-='",
TokenKind::StarAssign => "'*='",
TokenKind::SlashAssign => "'/='",
TokenKind::PercentAssign => "'%='",
TokenKind::DotAssign => "'.='",
TokenKind::AndAssign => "'&='",
TokenKind::OrAssign => "'|='",
TokenKind::XorAssign => "'^='",
TokenKind::PowerAssign => "'**='",
TokenKind::LeftShiftAssign => "'<<='",
TokenKind::RightShiftAssign => "'>>='",
TokenKind::LogicalAndAssign => "'&&='",
TokenKind::LogicalOrAssign => "'||='",
TokenKind::DefinedOrAssign => "'//='",
TokenKind::Equal => "'=='",
TokenKind::NotEqual => "'!='",
TokenKind::Match => "'=~'",
TokenKind::NotMatch => "'!~'",
TokenKind::SmartMatch => "'~~'",
TokenKind::Less => "'<'",
TokenKind::Greater => "'>'",
TokenKind::LessEqual => "'<='",
TokenKind::GreaterEqual => "'>='",
TokenKind::Spaceship => "'<=>'",
TokenKind::StringCompare => "'cmp'",
TokenKind::And => "'&&'",
TokenKind::Or => "'||'",
TokenKind::Not => "'!'",
TokenKind::DefinedOr => "'//'",
TokenKind::WordAnd => "'and'",
TokenKind::WordOr => "'or'",
TokenKind::WordNot => "'not'",
TokenKind::WordXor => "'xor'",
TokenKind::Arrow => "'->'",
TokenKind::FatArrow => "'=>'",
TokenKind::Dot => "'.'",
TokenKind::Range => "'..'",
TokenKind::Ellipsis => "'...'",
TokenKind::Increment => "'++'",
TokenKind::Decrement => "'--'",
TokenKind::DoubleColon => "'::'",
TokenKind::Question => "'?'",
TokenKind::Colon => "':'",
TokenKind::Backslash => "'\\'",
TokenKind::LeftParen => "'('",
TokenKind::RightParen => "')'",
TokenKind::LeftBrace => "'{'",
TokenKind::RightBrace => "'}'",
TokenKind::LeftBracket => "'['",
TokenKind::RightBracket => "']'",
TokenKind::Semicolon => "';'",
TokenKind::Comma => "','",
TokenKind::Number => "number",
TokenKind::String => "string",
TokenKind::Regex => "regex",
TokenKind::Substitution => "substitution (s///)",
TokenKind::Transliteration => "transliteration (tr///)",
TokenKind::QuoteSingle => "q// string",
TokenKind::QuoteDouble => "qq// string",
TokenKind::QuoteWords => "qw() word list",
TokenKind::QuoteCommand => "qx// command",
TokenKind::HeredocStart => "heredoc (<<)",
TokenKind::HeredocBody => "heredoc body",
TokenKind::FormatBody => "format body",
TokenKind::DataMarker => "data marker (__DATA__ or __END__)",
TokenKind::DataBody => "data section body",
TokenKind::VString => "version string",
TokenKind::UnknownRest => "unparsed remainder",
TokenKind::HeredocDepthLimit => "heredoc depth limit exceeded",
TokenKind::Identifier => "identifier",
TokenKind::ScalarSigil => "'$'",
TokenKind::ArraySigil => "'@'",
TokenKind::HashSigil => "'%'",
TokenKind::SubSigil => "'&'",
TokenKind::GlobSigil => "'*'",
TokenKind::Eof => "end of input",
TokenKind::Unknown => "unknown token",
}
}
}
const TOKEN_KIND_ALL: [TokenKind; 132] = [
TokenKind::My,
TokenKind::Our,
TokenKind::Local,
TokenKind::State,
TokenKind::Sub,
TokenKind::If,
TokenKind::Elsif,
TokenKind::Else,
TokenKind::Unless,
TokenKind::While,
TokenKind::Until,
TokenKind::For,
TokenKind::Foreach,
TokenKind::Return,
TokenKind::Package,
TokenKind::Use,
TokenKind::No,
TokenKind::Begin,
TokenKind::End,
TokenKind::Check,
TokenKind::Init,
TokenKind::Unitcheck,
TokenKind::Eval,
TokenKind::Do,
TokenKind::Given,
TokenKind::When,
TokenKind::Default,
TokenKind::Try,
TokenKind::Catch,
TokenKind::Finally,
TokenKind::Continue,
TokenKind::Next,
TokenKind::Last,
TokenKind::Redo,
TokenKind::Goto,
TokenKind::Class,
TokenKind::Method,
TokenKind::Field,
TokenKind::Format,
TokenKind::Undef,
TokenKind::Defer,
TokenKind::Assign,
TokenKind::Plus,
TokenKind::Minus,
TokenKind::Star,
TokenKind::Slash,
TokenKind::Percent,
TokenKind::Power,
TokenKind::LeftShift,
TokenKind::RightShift,
TokenKind::BitwiseAnd,
TokenKind::BitwiseOr,
TokenKind::BitwiseXor,
TokenKind::BitwiseNot,
TokenKind::PlusAssign,
TokenKind::MinusAssign,
TokenKind::StarAssign,
TokenKind::SlashAssign,
TokenKind::PercentAssign,
TokenKind::DotAssign,
TokenKind::AndAssign,
TokenKind::OrAssign,
TokenKind::XorAssign,
TokenKind::PowerAssign,
TokenKind::LeftShiftAssign,
TokenKind::RightShiftAssign,
TokenKind::LogicalAndAssign,
TokenKind::LogicalOrAssign,
TokenKind::DefinedOrAssign,
TokenKind::Equal,
TokenKind::NotEqual,
TokenKind::Match,
TokenKind::NotMatch,
TokenKind::SmartMatch,
TokenKind::Less,
TokenKind::Greater,
TokenKind::LessEqual,
TokenKind::GreaterEqual,
TokenKind::Spaceship,
TokenKind::StringCompare,
TokenKind::And,
TokenKind::Or,
TokenKind::Not,
TokenKind::DefinedOr,
TokenKind::WordAnd,
TokenKind::WordOr,
TokenKind::WordNot,
TokenKind::WordXor,
TokenKind::Arrow,
TokenKind::FatArrow,
TokenKind::Dot,
TokenKind::Range,
TokenKind::Ellipsis,
TokenKind::Increment,
TokenKind::Decrement,
TokenKind::DoubleColon,
TokenKind::Question,
TokenKind::Colon,
TokenKind::Backslash,
TokenKind::LeftParen,
TokenKind::RightParen,
TokenKind::LeftBrace,
TokenKind::RightBrace,
TokenKind::LeftBracket,
TokenKind::RightBracket,
TokenKind::Semicolon,
TokenKind::Comma,
TokenKind::Number,
TokenKind::String,
TokenKind::Regex,
TokenKind::Substitution,
TokenKind::Transliteration,
TokenKind::QuoteSingle,
TokenKind::QuoteDouble,
TokenKind::QuoteWords,
TokenKind::QuoteCommand,
TokenKind::HeredocStart,
TokenKind::HeredocBody,
TokenKind::FormatBody,
TokenKind::DataMarker,
TokenKind::DataBody,
TokenKind::VString,
TokenKind::UnknownRest,
TokenKind::HeredocDepthLimit,
TokenKind::Identifier,
TokenKind::ScalarSigil,
TokenKind::ArraySigil,
TokenKind::HashSigil,
TokenKind::SubSigil,
TokenKind::GlobSigil,
TokenKind::Eof,
TokenKind::Unknown,
];
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn token_span_new_and_accessors() {
let span = TokenSpan::new(5, 10);
assert_eq!(span.start, 5);
assert_eq!(span.end, 10);
assert_eq!(span.len(), 5);
assert!(!span.is_empty());
assert_eq!(span.range(), 5..10);
}
#[test]
fn token_span_is_empty_when_zero_length() {
let span = TokenSpan::new(3, 3);
assert!(span.is_empty());
assert_eq!(span.len(), 0);
}
#[test]
fn token_span_try_new_ok() {
let span = TokenSpan::try_new(0, 5).unwrap();
assert_eq!(span.start, 0);
assert_eq!(span.end, 5);
}
#[test]
fn token_span_try_new_end_before_start_errors() {
let err = TokenSpan::try_new(10, 5).unwrap_err();
assert_eq!(err, TokenSpanError::EndBeforeStart { start: 10, end: 5 });
}
#[test]
fn token_span_error_display_end_before_start() {
let err = TokenSpanError::EndBeforeStart { start: 10, end: 5 };
let msg = err.to_string();
assert!(msg.contains("10"));
assert!(msg.contains("5"));
}
#[test]
fn token_span_error_display_empty_span_not_allowed() {
let err = TokenSpanError::EmptySpanNotAllowed { kind: TokenKind::Identifier, at: 7 };
let msg = err.to_string();
assert!(msg.contains("Identifier"));
assert!(msg.contains("7"));
}
#[test]
fn token_new_stores_fields() {
let tok = Token::new(TokenKind::My, "my", 0, 2);
assert_eq!(tok.kind, TokenKind::My);
assert_eq!(&*tok.text, "my");
assert_eq!(tok.start, 0);
assert_eq!(tok.end, 2);
}
#[test]
fn token_len_and_is_empty() {
let tok = Token::new(TokenKind::Identifier, "foo", 10, 13);
assert_eq!(tok.len(), 3);
assert!(!tok.is_empty());
let eof = Token::eof_at(8);
assert_eq!(eof.len(), 0);
assert!(eof.is_empty());
}
#[test]
fn token_span_and_range() {
let tok = Token::new(TokenKind::Number, "42", 5, 7);
assert_eq!(tok.span(), TokenSpan::new(5, 7));
assert_eq!(tok.range(), 5..7);
}
#[test]
fn token_try_new_rejects_end_before_start() {
let err = Token::try_new(TokenKind::Identifier, "x", 10, 5).unwrap_err();
assert_eq!(err, TokenSpanError::EndBeforeStart { start: 10, end: 5 });
}
#[test]
fn token_new_checked_rejects_empty_non_eof() {
let err = Token::new_checked(TokenKind::Identifier, "", 5, 5).unwrap_err();
assert!(matches!(
err,
TokenSpanError::EmptySpanNotAllowed { kind: TokenKind::Identifier, at: 5 }
));
}
#[test]
fn token_new_checked_allows_empty_eof() {
let tok = Token::new_checked(TokenKind::Eof, "", 5, 5).unwrap();
assert_eq!(tok.kind, TokenKind::Eof);
assert_eq!(tok.start, 5);
}
#[test]
fn token_eof_at() {
let eof = Token::eof_at(42);
assert_eq!(eof.kind, TokenKind::Eof);
assert_eq!(eof.start, 42);
assert_eq!(eof.end, 42);
assert!(eof.is_empty());
}
#[test]
fn token_unknown_at_normalises_inverted_span() {
let tok = Token::unknown_at("?", 5, 3); assert_eq!(tok.kind, TokenKind::Unknown);
assert_eq!(tok.start, 5);
assert_eq!(tok.end, 5); }
#[test]
fn token_with_kind() {
let tok = Token::new(TokenKind::Identifier, "sub", 0, 3);
let retyped = tok.with_kind(TokenKind::Sub);
assert_eq!(retyped.kind, TokenKind::Sub);
assert_eq!(&*retyped.text, "sub");
assert_eq!(retyped.start, 0);
assert_eq!(retyped.end, 3);
}
#[test]
fn token_with_span_ok() {
let tok = Token::new(TokenKind::String, "hello", 0, 5);
let moved = tok.with_span(10, 15).unwrap();
assert_eq!(moved.start, 10);
assert_eq!(moved.end, 15);
}
#[test]
fn token_display_name_delegates_to_kind() {
let tok = Token::new(TokenKind::LeftBrace, "{", 0, 1);
assert_eq!(tok.display_name(), "'{'");
}
#[test]
fn token_as_ref_token_round_trip() {
let tok = Token::new(TokenKind::Sub, "sub", 0, 3);
let tok_ref = tok.as_ref_token();
assert_eq!(tok_ref.kind, TokenKind::Sub);
assert_eq!(tok_ref.text, "sub");
assert_eq!(tok_ref.start, 0);
assert_eq!(tok_ref.end, 3);
let owned: Token = tok_ref.into();
assert_eq!(owned.kind, TokenKind::Sub);
assert_eq!(&*owned.text, "sub");
}
#[test]
fn token_ref_accessors() {
let r = TokenRef::new(TokenKind::Number, "99", 4, 6);
assert_eq!(r.len(), 2);
assert!(!r.is_empty());
assert_eq!(r.span(), (4, 6));
assert_eq!(r.display_name(), "number");
}
#[test]
fn token_ref_to_owned_token() {
let r = TokenRef::new(TokenKind::Identifier, "foo", 1, 4);
let owned = r.to_owned_token();
assert_eq!(owned.kind, TokenKind::Identifier);
assert_eq!(&*owned.text, "foo");
}
#[test]
fn from_keyword_recognises_perl_keywords() {
assert_eq!(TokenKind::from_keyword("my"), Some(TokenKind::My));
assert_eq!(TokenKind::from_keyword("sub"), Some(TokenKind::Sub));
assert_eq!(TokenKind::from_keyword("if"), Some(TokenKind::If));
assert_eq!(TokenKind::from_keyword("elsif"), Some(TokenKind::Elsif));
assert_eq!(TokenKind::from_keyword("else"), Some(TokenKind::Else));
assert_eq!(TokenKind::from_keyword("while"), Some(TokenKind::While));
assert_eq!(TokenKind::from_keyword("for"), Some(TokenKind::For));
assert_eq!(TokenKind::from_keyword("foreach"), Some(TokenKind::Foreach));
assert_eq!(TokenKind::from_keyword("return"), Some(TokenKind::Return));
assert_eq!(TokenKind::from_keyword("package"), Some(TokenKind::Package));
assert_eq!(TokenKind::from_keyword("use"), Some(TokenKind::Use));
assert_eq!(TokenKind::from_keyword("BEGIN"), Some(TokenKind::Begin));
assert_eq!(TokenKind::from_keyword("END"), Some(TokenKind::End));
assert_eq!(TokenKind::from_keyword("eval"), Some(TokenKind::Eval));
assert_eq!(TokenKind::from_keyword("class"), Some(TokenKind::Class));
assert_eq!(TokenKind::from_keyword("defer"), Some(TokenKind::Defer));
assert_eq!(TokenKind::from_keyword("and"), Some(TokenKind::WordAnd));
assert_eq!(TokenKind::from_keyword("or"), Some(TokenKind::WordOr));
assert_eq!(TokenKind::from_keyword("not"), Some(TokenKind::WordNot));
assert_eq!(TokenKind::from_keyword("xor"), Some(TokenKind::WordXor));
assert_eq!(TokenKind::from_keyword("cmp"), Some(TokenKind::StringCompare));
}
#[test]
fn from_keyword_unknown_returns_none() {
assert_eq!(TokenKind::from_keyword("MY"), None);
assert_eq!(TokenKind::from_keyword("Sub"), None);
assert_eq!(TokenKind::from_keyword("unknown"), None);
assert_eq!(TokenKind::from_keyword(""), None);
}
#[test]
fn from_operator_recognises_operators() {
assert_eq!(TokenKind::from_operator("="), Some(TokenKind::Assign));
assert_eq!(TokenKind::from_operator("+"), Some(TokenKind::Plus));
assert_eq!(TokenKind::from_operator("**"), Some(TokenKind::Power));
assert_eq!(TokenKind::from_operator("->"), Some(TokenKind::Arrow));
assert_eq!(TokenKind::from_operator("=>"), Some(TokenKind::FatArrow));
assert_eq!(TokenKind::from_operator("<=>"), Some(TokenKind::Spaceship));
assert_eq!(TokenKind::from_operator("//="), Some(TokenKind::DefinedOrAssign));
assert_eq!(TokenKind::from_operator("..."), Some(TokenKind::Ellipsis));
assert_eq!(TokenKind::from_operator("~~"), Some(TokenKind::SmartMatch));
}
#[test]
fn from_operator_unknown_returns_none() {
assert_eq!(TokenKind::from_operator(""), None);
assert_eq!(TokenKind::from_operator("xyz"), None);
}
#[test]
fn from_delimiter_recognises_all() {
assert_eq!(TokenKind::from_delimiter("("), Some(TokenKind::LeftParen));
assert_eq!(TokenKind::from_delimiter(")"), Some(TokenKind::RightParen));
assert_eq!(TokenKind::from_delimiter("{"), Some(TokenKind::LeftBrace));
assert_eq!(TokenKind::from_delimiter("}"), Some(TokenKind::RightBrace));
assert_eq!(TokenKind::from_delimiter("["), Some(TokenKind::LeftBracket));
assert_eq!(TokenKind::from_delimiter("]"), Some(TokenKind::RightBracket));
assert_eq!(TokenKind::from_delimiter(";"), Some(TokenKind::Semicolon));
assert_eq!(TokenKind::from_delimiter(","), Some(TokenKind::Comma));
assert_eq!(TokenKind::from_delimiter("x"), None);
}
#[test]
fn from_sigil_recognises_all() {
assert_eq!(TokenKind::from_sigil("$"), Some(TokenKind::ScalarSigil));
assert_eq!(TokenKind::from_sigil("@"), Some(TokenKind::ArraySigil));
assert_eq!(TokenKind::from_sigil("%"), Some(TokenKind::HashSigil));
assert_eq!(TokenKind::from_sigil("&"), Some(TokenKind::SubSigil));
assert_eq!(TokenKind::from_sigil("*"), Some(TokenKind::GlobSigil));
assert_eq!(TokenKind::from_sigil("!"), None);
}
#[test]
fn category_keyword_variants() {
assert_eq!(TokenKind::My.category(), TokenCategory::Keyword);
assert_eq!(TokenKind::Sub.category(), TokenCategory::Keyword);
assert_eq!(TokenKind::Defer.category(), TokenCategory::Keyword);
}
#[test]
fn category_operator_variants() {
assert_eq!(TokenKind::Plus.category(), TokenCategory::Operator);
assert_eq!(TokenKind::Spaceship.category(), TokenCategory::Operator);
assert_eq!(TokenKind::WordAnd.category(), TokenCategory::Operator);
}
#[test]
fn category_delimiter_variants() {
assert_eq!(TokenKind::LeftParen.category(), TokenCategory::Delimiter);
assert_eq!(TokenKind::Comma.category(), TokenCategory::Delimiter);
}
#[test]
fn category_literal_variants() {
assert_eq!(TokenKind::Number.category(), TokenCategory::Literal);
assert_eq!(TokenKind::HeredocStart.category(), TokenCategory::Literal);
assert_eq!(TokenKind::DataMarker.category(), TokenCategory::Literal);
}
#[test]
fn category_identifier_variants() {
assert_eq!(TokenKind::Identifier.category(), TokenCategory::Identifier);
assert_eq!(TokenKind::ScalarSigil.category(), TokenCategory::Identifier);
assert_eq!(TokenKind::GlobSigil.category(), TokenCategory::Identifier);
}
#[test]
fn category_special_variants() {
assert_eq!(TokenKind::Eof.category(), TokenCategory::Special);
assert_eq!(TokenKind::Unknown.category(), TokenCategory::Special);
}
#[test]
fn display_name_selected_variants() {
assert_eq!(TokenKind::LeftBrace.display_name(), "'{'");
assert_eq!(TokenKind::RightBrace.display_name(), "'}'");
assert_eq!(TokenKind::Identifier.display_name(), "identifier");
assert_eq!(TokenKind::Eof.display_name(), "end of input");
assert_eq!(TokenKind::Number.display_name(), "number");
assert_eq!(TokenKind::Sub.display_name(), "'sub'");
assert_eq!(TokenKind::Semicolon.display_name(), "';'");
assert_eq!(TokenKind::HeredocStart.display_name(), "heredoc (<<)");
assert_eq!(TokenKind::DataMarker.display_name(), "data marker (__DATA__ or __END__)");
}
#[test]
fn all_returns_132_variants() {
assert_eq!(TokenKind::all().len(), 132);
assert_eq!(TokenKind::metadata_count(), 132);
}
#[test]
fn metadata_round_trips_through_kind() {
let m = TokenKind::Sub.metadata();
assert_eq!(m.category, TokenCategory::Keyword);
assert_eq!(m.display_name, "'sub'");
}
}