use alloc_from_pool::{Factory as PoolFactory, PoolValue};
use crate::lexer::*;
use crate::maybe_byte::*;
use crate::source::buffer::*;
use crate::source::Comment;
use crate::source::Decoder;
use crate::source::MagicComment;
use crate::str_term::{str_types::*, HeredocEnd, StrTerm, StringLiteral};
use crate::Loc;
use crate::SharedContext;
use crate::StackState;
use crate::StaticEnvironment;
use crate::Token;
use crate::TokenBuf;
use crate::{error::Diagnostics, Bytes};
use crate::{lex_states::*, LexState};
use crate::{Diagnostic, DiagnosticMessage, ErrorLevel};
#[derive(Debug, Default)]
pub struct Lexer {
pub(crate) buffer: Buffer,
pub(crate) lval: Option<Bytes>,
pub(crate) lval_start: Option<usize>,
pub(crate) lval_end: Option<usize>,
pub(crate) strterm: Option<Box<StrTerm>>,
pub lex_state: LexState,
pub(crate) paren_nest: i32,
pub(crate) lpar_beg: i32,
pub(crate) brace_nest: i32,
pub cond: StackState,
pub cmdarg: StackState,
pub(crate) tokenbuf: TokenBuf,
pub(crate) context: SharedContext,
pub(crate) command_start: bool,
pub(crate) token_seen: bool,
pub static_env: StaticEnvironment,
pub(crate) diagnostics: Diagnostics,
pub(crate) comments: Vec<Comment>,
pub(crate) magic_comments: Vec<MagicComment>,
#[doc(hidden)]
pub tokens_factory: PoolFactory<Token>,
}
impl Lexer {
pub(crate) const NULL_CHAR: u8 = 0x00;
pub(crate) const CTRL_D_CHAR: u8 = 0x04;
pub(crate) const CTRL_Z_CHAR: u8 = 0x1a;
pub(crate) const LF_CHAR: u8 = 0x0c;
pub(crate) const VTAB_CHAR: u8 = 0x0b;
pub fn new<Bytes, Name>(bytes: Bytes, name: Name, decoder: Option<Decoder>) -> Self
where
Bytes: Into<Vec<u8>>,
Name: Into<String>,
{
Self {
cond: StackState::new("cond"),
cmdarg: StackState::new("cmdarg"),
lpar_beg: -1,
buffer: Buffer::new(name.into(), bytes.into(), decoder),
..Self::default()
}
}
pub fn tokenize_until_eof(&mut self) -> Vec<Token> {
let mut tokens = vec![];
loop {
let token = self.yylex().take_value();
match token.token_type {
Self::END_OF_INPUT => break,
_ => tokens.push(token),
}
}
tokens
}
pub(crate) fn yylex(&mut self) -> PoolValue<Token> {
let lex_state_before = self.lex_state;
self.lval = None;
let token_type = self.parser_yylex();
let begin = std::mem::take(&mut self.lval_start).unwrap_or(self.buffer.ptok);
let mut end = std::mem::take(&mut self.lval_end).unwrap_or(self.buffer.pcur);
let mut token_value = self
.lval
.take()
.or_else(|| {
self.buffer
.substr_at(begin, end)
.map(|s| Bytes::new(Vec::from(s)))
})
.unwrap_or_else(|| Bytes::new(vec![]));
if token_type == Self::tNL {
token_value = Bytes::new(vec![b'\n']);
end = begin + 1;
}
let token = self.tokens_factory.alloc(Token {
token_type,
token_value,
loc: Loc { begin, end },
lex_state_before,
lex_state_after: self.lex_state,
});
println_if_debug_lexer!(
"yylex ({:?}, {:?}, {:?})",
token.token_name(),
token.token_value,
token.loc
);
token
}
pub(crate) fn nextc(&mut self) -> MaybeByte {
self.buffer.nextc()
}
pub(crate) fn char_at(&self, idx: usize) -> MaybeByte {
self.buffer.byte_at(idx)
}
pub(crate) fn token_flush(&mut self) {
self.buffer.token_flush()
}
pub(crate) fn parser_yylex(&mut self) -> i32 {
let mut c: MaybeByte;
let mut space_seen: bool = false;
let label: usize;
let mut last_state: LexState;
let token_seen = self.token_seen;
if let Some(strterm) = self.strterm.as_ref().map(|i| i.as_ref()) {
match strterm {
StrTerm::HeredocLiteral(_) => {
return self.here_document();
}
StrTerm::StringLiteral(_) => {
self.token_flush();
return self.parse_string();
}
}
}
let cmd_state = self.command_start;
self.command_start = false;
self.token_seen = true;
'retrying: loop {
last_state = self.lex_state;
self.token_flush();
c = self.nextc();
if c.is_eof() {
return Self::END_OF_INPUT;
}
match c.as_option() {
None
| Some(Self::NULL_CHAR)
| Some(Self::CTRL_D_CHAR)
| Some(Self::CTRL_Z_CHAR) => return Self::END_OF_INPUT,
Some(b'\r') => {
if !self.buffer.cr_seen {
self.buffer.cr_seen = true;
self.warn(
DiagnosticMessage::SlashRAtMiddleOfLine {},
self.current_loc(),
);
}
}
Some(b' ') | Some(b'\t') | Some(Self::LF_CHAR) | Some(Self::VTAB_CHAR) => {
space_seen = true;
continue 'retrying;
}
Some(b'#') | Some(b'\n') => {
if c == b'#' {
self.token_seen = token_seen;
let magic_comment = self
.magic_comment(self.buffer.pcur, self.buffer.pend - self.buffer.pcur);
match magic_comment {
Ok(magic_comment) => {
if !magic_comment && self.comment_at_top() {
self.set_file_encoding(self.buffer.pcur, self.buffer.pend)
}
}
Err(_) => return Self::END_OF_INPUT,
}
self.buffer.goto_eol();
self.comments
.push(Comment::new(self.current_loc(), &self.buffer.input.decoded))
}
self.token_seen = token_seen;
let cc = self
.lex_state
.is_some(EXPR_BEG | EXPR_CLASS | EXPR_FNAME | EXPR_DOT)
&& !self.lex_state.is_some(EXPR_LABELED);
if cc || self.lex_state.is_all(EXPR_ARG | EXPR_LABELED) {
if !cc && self.context.in_kwarg() {
return self.normal_newline_leaf_label();
}
continue 'retrying;
}
loop {
c = self.nextc();
#[allow(clippy::never_loop)]
loop {
if c == b' '
|| c == b'\t'
|| c == Self::LF_CHAR
|| c == b'\r'
|| c == Self::VTAB_CHAR
{
space_seen = true;
break;
}
if c == b'#' {
self.buffer.pushback(c);
continue 'retrying;
}
if c == b'&' || c == b'.' {
if self.buffer.peek(b'.') == (c == b'&') {
self.buffer.pushback(c);
continue 'retrying;
}
}
if c.is_eof() {
self.buffer.eof_no_decrement();
return self.normal_newline_leaf_label();
}
self.buffer.ruby_sourceline -= 1;
self.buffer.nextline = self.buffer.lastline;
self.buffer.eof_no_decrement();
return self.normal_newline_leaf_label();
}
}
}
Some(b'*') => {
let result: i32;
c = self.nextc();
if c == b'*' {
c = self.nextc();
if c == b'=' {
self.set_yylval_id("**=");
self.lex_state.set(EXPR_BEG);
return Self::tOP_ASGN;
}
self.buffer.pushback(c);
if self.lex_state.is_spacearg(c, space_seen) {
self.warn(
DiagnosticMessage::DStarInterpretedAsArgPrefix {},
self.current_loc(),
);
result = Self::tDSTAR;
} else if self.lex_state.is_beg() {
result = Self::tDSTAR;
} else {
result = self.warn_balanced(
Self::tPOW,
"**",
"argument prefix",
c,
space_seen,
last_state,
);
}
} else {
if c == b'=' {
self.set_yylval_id("*=");
self.lex_state.set(EXPR_BEG);
return Self::tOP_ASGN;
}
self.buffer.pushback(c);
if self.lex_state.is_spacearg(c, space_seen) {
self.warn(
DiagnosticMessage::StarInterpretedAsArgPrefix {},
self.current_loc(),
);
result = Self::tSTAR;
} else if self.lex_state.is_beg() {
result = Self::tSTAR;
} else {
result = self.warn_balanced(
Self::tSTAR2,
"*",
"argument prefix",
c,
space_seen,
last_state,
);
}
}
self.lex_state.set(if self.lex_state.is_after_operator() {
EXPR_ARG
} else {
EXPR_BEG
});
return result;
}
Some(b'!') => {
c = self.nextc();
if self.lex_state.is_after_operator() {
self.lex_state.set(EXPR_ARG);
if c == b'@' {
return Self::tBANG;
}
} else {
self.lex_state.set(EXPR_BEG);
}
if c == b'=' {
return Self::tNEQ;
}
if c == b'~' {
return Self::tNMATCH;
}
self.buffer.pushback(c);
return Self::tBANG;
}
Some(b'=') => {
if self.buffer.was_bol() {
if self.buffer.is_word_match("begin") {
let begin_loc = self.loc(self.buffer.pcur - 1, self.buffer.pcur + 5);
self.buffer.goto_eol();
loop {
self.buffer.goto_eol();
c = self.nextc();
if c.is_eof() {
self.compile_error(
DiagnosticMessage::EmbeddedDocumentMeetsEof {},
begin_loc,
);
return Self::END_OF_INPUT;
}
if c == b'=' && self.buffer.is_word_match("end") {
break;
}
self.buffer.pushback(c);
}
self.buffer.goto_eol();
self.comments.push(Comment::new(
begin_loc.with_end(self.buffer.pcur),
&self.buffer.input.decoded,
));
continue 'retrying;
}
}
self.lex_state.set(if self.lex_state.is_after_operator() {
EXPR_ARG
} else {
EXPR_BEG
});
c = self.nextc();
if c == b'=' {
c = self.nextc();
if c == b'=' {
return Self::tEQQ;
}
self.buffer.pushback(c);
return Self::tEQ;
}
if c == b'~' {
return Self::tMATCH;
} else if c == b'>' {
return Self::tASSOC;
}
self.buffer.pushback(c);
return Self::tEQL;
}
Some(b'<') => {
c = self.nextc();
if c == b'<'
&& !self.lex_state.is_some(EXPR_DOT | EXPR_CLASS)
&& !self.lex_state.is_end()
&& (!self.lex_state.is_arg()
|| self.lex_state.is_some(EXPR_LABELED)
|| space_seen)
{
if let Some(token) = self.heredoc_identifier() {
return token;
}
}
if self.lex_state.is_after_operator() {
self.lex_state.set(EXPR_ARG);
} else {
if self.lex_state.is_some(EXPR_CLASS) {
self.command_start = true;
}
self.lex_state.set(EXPR_BEG);
}
if c == b'=' {
c = self.nextc();
if c == b'>' {
return Self::tCMP;
}
self.buffer.pushback(c);
return Self::tLEQ;
}
if c == b'<' {
c = self.nextc();
if c == b'=' {
self.set_yylval_id("<<=");
self.lex_state.set(EXPR_BEG);
return Self::tOP_ASGN;
}
self.buffer.pushback(c);
return self.warn_balanced(
Self::tLSHFT,
"<<",
"here document",
c,
space_seen,
last_state,
);
}
self.buffer.pushback(c);
return Self::tLT;
}
Some(b'>') => {
self.lex_state.set(if self.lex_state.is_after_operator() {
EXPR_ARG
} else {
EXPR_BEG
});
c = self.nextc();
if c == b'=' {
return Self::tGEQ;
}
if c == b'>' {
c = self.nextc();
if c == b'=' {
self.set_yylval_id(">>=");
self.lex_state.set(EXPR_BEG);
return Self::tOP_ASGN;
}
self.buffer.pushback(c);
return Self::tRSHFT;
}
self.buffer.pushback(c);
return Self::tGT;
}
Some(b'"') => {
label = if self.lex_state.is_label_possible(cmd_state) {
str_label
} else {
0
};
self.strterm = self.new_strterm(str_dquote | label, b'"', None, None);
self.buffer.set_ptok(self.buffer.pcur - 1);
return Self::tSTRING_BEG;
}
Some(b'`') => {
if self.lex_state.is_some(EXPR_FNAME) {
self.lex_state.set(EXPR_ENDFN);
return Self::tBACK_REF2;
}
if self.lex_state.is_some(EXPR_DOT) {
if cmd_state {
self.lex_state.set(EXPR_CMDARG);
} else {
self.lex_state.set(EXPR_ARG);
}
return Self::tBACK_REF2;
}
self.strterm = self.new_strterm(str_xquote, b'`', None, None);
return Self::tXSTRING_BEG;
}
Some(b'\'') => {
label = if self.lex_state.is_label_possible(cmd_state) {
str_label
} else {
0
};
self.strterm = self.new_strterm(str_squote | label, b'\'', None, None);
self.buffer.set_ptok(self.buffer.pcur - 1);
return Self::tSTRING_BEG;
}
Some(b'?') => {
return self.parse_qmark(space_seen).unwrap_or(-1);
}
Some(b'&') => {
let result: i32;
c = self.nextc();
if c == b'&' {
self.lex_state.set(EXPR_BEG);
c = self.nextc();
if c == b'=' {
self.set_yylval_id("&&=");
self.lex_state.set(EXPR_BEG);
return Self::tOP_ASGN;
}
self.buffer.pushback(c);
return Self::tANDOP;
} else if c == b'=' {
self.set_yylval_id("&=");
self.lex_state.set(EXPR_BEG);
return Self::tOP_ASGN;
} else if c == b'.' {
self.set_yylval_id("&.");
self.lex_state.set(EXPR_DOT);
return Self::tANDDOT;
}
self.buffer.pushback(c);
if self.lex_state.is_spacearg(c, space_seen) {
if c != b':'
|| {
c = self.buffer.peekc_n(1);
!c.is_eof()
}
|| !(c == b'\''
|| c == b'"'
|| self
.buffer
.is_identchar(self.buffer.pcur + 1, self.buffer.pend))
{
self.warn(
DiagnosticMessage::AmpersandInterpretedAsArgPrefix {},
self.current_loc(),
);
}
result = Self::tAMPER;
} else if self.lex_state.is_beg() {
result = Self::tAMPER;
} else {
result = self.warn_balanced(
Self::tAMPER2,
"&",
"argument prefix",
c,
space_seen,
last_state,
);
}
self.lex_state.set(if self.lex_state.is_after_operator() {
EXPR_ARG
} else {
EXPR_BEG
});
return result;
}
Some(b'|') => {
c = self.nextc();
if c == b'|' {
self.lex_state.set(EXPR_BEG);
c = self.nextc();
if c == b'=' {
self.set_yylval_id("||=");
self.lex_state.set(EXPR_BEG);
return Self::tOP_ASGN;
}
self.buffer.pushback(c);
if last_state.is_some(EXPR_BEG) {
self.buffer.pushback(b'|');
return Self::tPIPE;
}
return Self::tOROP;
}
if c == b'=' {
self.set_yylval_id("|=");
self.lex_state.set(EXPR_BEG);
return Self::tOP_ASGN;
}
self.lex_state.set(if self.lex_state.is_after_operator() {
EXPR_ARG
} else {
EXPR_BEG | EXPR_LABEL
});
self.buffer.pushback(c);
return Self::tPIPE;
}
Some(b'+') => {
c = self.nextc();
if self.lex_state.is_after_operator() {
self.lex_state.set(EXPR_ARG);
if c == b'@' {
return Self::tUPLUS;
}
self.buffer.pushback(c);
return Self::tPLUS;
}
if c == b'=' {
self.set_yylval_id("+=");
self.lex_state.set(EXPR_BEG);
return Self::tOP_ASGN;
}
if self.lex_state.is_beg()
|| (self.lex_state.is_spacearg(c, space_seen)
&& self.arg_ambiguous(b'+', self.current_loc().adjust_end(-1)))
{
self.lex_state.set(EXPR_BEG);
self.buffer.pushback(c);
if !c.is_eof() && c.is_digit() {
return self.parse_numeric(b'+');
}
return Self::tUPLUS;
}
self.lex_state.set(EXPR_BEG);
self.buffer.pushback(c);
return self.warn_balanced(
Self::tPLUS,
"+",
"unary operator",
c,
space_seen,
last_state,
);
}
Some(b'-') => {
c = self.nextc();
if self.lex_state.is_after_operator() {
self.lex_state.set(EXPR_ARG);
if c == b'@' {
return Self::tUMINUS;
}
self.buffer.pushback(c);
return Self::tMINUS;
}
if c == b'=' {
self.set_yylval_id("-=");
self.lex_state.set(EXPR_BEG);
return Self::tOP_ASGN;
}
if c == b'>' {
self.lex_state.set(EXPR_ENDFN);
return Self::tLAMBDA;
}
if self.lex_state.is_beg()
|| (self.lex_state.is_spacearg(c, space_seen)
&& self.arg_ambiguous(b'-', self.current_loc().adjust_end(-1)))
{
self.lex_state.set(EXPR_BEG);
self.buffer.pushback(c);
if !c.is_eof() && c.is_digit() {
return Self::tUMINUS_NUM;
}
return Self::tUMINUS;
}
self.lex_state.set(EXPR_BEG);
self.buffer.pushback(c);
return self.warn_balanced(
Self::tMINUS,
"-",
"unary operator",
c,
space_seen,
last_state,
);
}
Some(b'.') => {
let is_beg = self.lex_state.is_beg();
self.lex_state.set(EXPR_BEG);
c = self.nextc();
if c == b'.' {
c = self.nextc();
if c == b'.' {
if self.context.in_argdef() {
self.lex_state.set(EXPR_ENDARG);
return Self::tBDOT3;
}
if self.paren_nest == 0 && self.buffer.is_looking_at_eol() {
self.warn(DiagnosticMessage::TripleDotAtEol {}, self.current_loc());
} else if self.lpar_beg >= 0
&& self.lpar_beg + 1 == self.paren_nest
&& last_state.is_some(EXPR_LABEL)
{
return Self::tDOT3;
}
return if is_beg { Self::tBDOT3 } else { Self::tDOT3 };
}
self.buffer.pushback(c);
return if is_beg { Self::tBDOT2 } else { Self::tDOT2 };
}
self.buffer.pushback(c);
if !c.is_eof() && c.is_digit() {
let prev = if self.buffer.pcur - 1 > self.buffer.pbeg {
self.buffer.byte_at(self.buffer.pcur - 2)
} else {
MaybeByte::EndOfInput
};
self.parse_numeric(b'.');
if prev.is_digit() {
self.yyerror0(DiagnosticMessage::FractionAfterNumeric {});
} else {
self.yyerror0(DiagnosticMessage::NoDigitsAfterDot {});
}
self.lex_state.set(EXPR_END);
self.buffer.set_ptok(self.buffer.pcur);
continue 'retrying;
}
self.set_yylval_id(".");
self.lex_state.set(EXPR_DOT);
return Self::tDOT;
}
Some(c) if (b'0'..=b'9').contains(&c) => {
return self.parse_numeric(c);
}
Some(b')') => {
self.cond.pop();
self.cmdarg.pop();
self.lex_state.set(EXPR_ENDFN);
self.paren_nest -= 1;
return Self::tRPAREN;
}
Some(b']') => {
self.cond.pop();
self.cmdarg.pop();
self.lex_state.set(EXPR_END);
self.paren_nest -= 1;
return Self::tRBRACK;
}
Some(b'}') => {
if self.brace_nest == 0 {
self.brace_nest -= 1;
return Self::tSTRING_DEND;
}
self.brace_nest -= 1;
self.cond.pop();
self.cmdarg.pop();
self.lex_state.set(EXPR_END);
self.paren_nest -= 1;
return Self::tRCURLY;
}
Some(b':') => {
c = self.nextc();
if c == b':' {
if self.lex_state.is_beg()
|| self.lex_state.is_some(EXPR_CLASS)
|| self
.lex_state
.is_spacearg(MaybeByte::EndOfInput, space_seen)
{
self.lex_state.set(EXPR_BEG);
return Self::tCOLON3;
}
self.set_yylval_id("::");
self.lex_state.set(EXPR_DOT);
return Self::tCOLON2;
}
if self.lex_state.is_end() || c.is_space() || c == Some(b'#') {
self.buffer.pushback(c);
let result = self.warn_balanced(
Self::tCOLON,
":",
"symbol literal",
c,
space_seen,
last_state,
);
self.lex_state.set(EXPR_BEG);
return result;
}
match c.as_option() {
Some(c) if c == b'\'' => {
self.strterm = self.new_strterm(str_ssym, c, None, None)
}
Some(c) if c == b'"' => {
self.strterm = self.new_strterm(str_dsym, c, None, None)
}
_ => self.buffer.pushback(c),
}
self.lex_state.set(EXPR_FNAME);
return Self::tSYMBEG;
}
Some(b'/') => {
if self.lex_state.is_beg() {
self.strterm = self.new_strterm(str_regexp, b'/', None, None);
return Self::tREGEXP_BEG;
}
c = self.nextc();
if c == b'=' {
self.set_yylval_id("/=");
self.lex_state.set(EXPR_BEG);
return Self::tOP_ASGN;
}
self.buffer.pushback(c);
if self.lex_state.is_spacearg(c, space_seen) {
self.arg_ambiguous(b'/', self.current_loc());
self.strterm = self.new_strterm(str_regexp, b'/', None, None);
return Self::tREGEXP_BEG;
}
self.lex_state.set(if self.lex_state.is_after_operator() {
EXPR_ARG
} else {
EXPR_BEG
});
return self.warn_balanced(
Self::tDIVIDE,
"/",
"regexp literal",
c,
space_seen,
last_state,
);
}
Some(b'^') => {
c = self.nextc();
if c == b'=' {
self.set_yylval_id("^=");
self.lex_state.set(EXPR_BEG);
return Self::tOP_ASGN;
}
self.lex_state.set(if self.lex_state.is_after_operator() {
EXPR_ARG
} else {
EXPR_BEG
});
self.buffer.pushback(c);
return Self::tCARET;
}
Some(b';') => {
self.lex_state.set(EXPR_BEG);
self.command_start = true;
return Self::tSEMI;
}
Some(b',') => {
self.lex_state.set(EXPR_BEG | EXPR_LABEL);
return Self::tCOMMA;
}
Some(b'~') => {
if self.lex_state.is_after_operator() {
c = self.nextc();
if c != b'@' {
self.buffer.pushback(c);
}
self.lex_state.set(EXPR_ARG);
} else {
self.lex_state.set(EXPR_BEG);
}
return Self::tTILDE;
}
Some(b'(') => {
let mut result: i32 = Self::tLPAREN2;
if self.lex_state.is_beg() {
result = Self::tLPAREN;
} else if !space_seen {
} else if self.lex_state.is_arg()
|| self.lex_state.is_all(EXPR_END | EXPR_LABEL)
{
result = Self::tLPAREN_ARG;
} else if self.lex_state.is_some(EXPR_ENDFN) && !self.is_lambda_beginning() {
self.warn(
DiagnosticMessage::ParenthesesIterpretedAsArglist {},
self.current_loc(),
);
}
self.paren_nest += 1;
self.cond.push(false);
self.cmdarg.push(false);
self.lex_state.set(EXPR_BEG | EXPR_LABEL);
return result;
}
Some(b'[') => {
let mut result: i32 = Self::tLBRACK2;
self.paren_nest += 1;
if self.lex_state.is_after_operator() {
c = self.nextc();
if c == b']' {
self.paren_nest -= 1;
self.lex_state.set(EXPR_ARG);
c = self.nextc();
if c == b'=' {
return Self::tASET;
}
self.buffer.pushback(c);
return Self::tAREF;
}
self.buffer.pushback(c);
self.lex_state.set(EXPR_ARG | EXPR_LABEL);
return Self::tLBRACK2;
} else if self.lex_state.is_beg()
|| (self.lex_state.is_arg()
&& (space_seen || self.lex_state.is_some(EXPR_LABELED)))
{
result = Self::tLBRACK;
}
self.lex_state.set(EXPR_BEG | EXPR_LABEL);
self.cond.push(false);
self.cmdarg.push(false);
return result;
}
Some(b'{') => {
self.brace_nest += 1;
let result: i32;
if self.is_lambda_beginning() {
result = Self::tLAMBEG;
} else if self.lex_state.is_some(EXPR_LABELED) {
result = Self::tLBRACE;
} else if self.lex_state.is_some(EXPR_ARG_ANY | EXPR_END | EXPR_ENDFN) {
result = Self::tLCURLY;
} else if self.lex_state.is_some(EXPR_ENDARG) {
result = Self::tLBRACE_ARG;
} else {
result = Self::tLBRACE;
}
if result != Self::tLBRACE {
self.command_start = true;
self.lex_state.set(EXPR_BEG);
} else {
self.lex_state.set(EXPR_BEG | EXPR_LABEL);
}
self.paren_nest += 1;
self.cond.push(false);
self.cmdarg.push(false);
return result;
}
Some(b'\\') => {
c = self.nextc();
if c == b'\n' {
space_seen = true;
continue 'retrying;
}
if c == b' ' {
return Self::tSP;
}
if c.is_space() {
match c.as_option() {
Some(b'\t') => return Self::tSLASH_T,
Some(Self::LF_CHAR) => return Self::tSLASH_F,
Some(b'\r') => return Self::tSLASH_R,
Some(Self::VTAB_CHAR) => return Self::tVTAB,
Some(other) => unreachable!("unsupported space char {:?}", other),
None => {}
}
}
self.buffer.pushback(c);
return Self::tBACKSLASH;
}
Some(b'%') => {
return self.parse_percent(space_seen, last_state);
}
Some(b'$') => {
return self.parse_gvar(last_state);
}
Some(b'@') => {
return self.parse_atmark(last_state);
}
Some(b'_') => {
if self.buffer.was_bol() && self.buffer.is_whole_match(b"__END__", 0) {
self.buffer.eofp = true;
return Self::END_OF_INPUT;
}
self.newtok();
}
Some(c) => {
if !self.is_identchar() {
self.compile_error(
DiagnosticMessage::InvalidChar { c },
self.current_loc(),
);
self.token_flush();
continue 'retrying;
}
self.newtok();
}
}
break;
}
self.parse_ident(c, cmd_state)
}
fn normal_newline_leaf_label(&mut self) -> i32 {
self.command_start = true;
self.lex_state.set(EXPR_BEG);
Self::tNL
}
pub(crate) fn warn(&mut self, message: DiagnosticMessage, loc: Loc) {
println_if_debug_lexer!("WARNING: {}", message.render());
let diagnostic = Diagnostic {
level: ErrorLevel::Warning,
message,
loc,
};
self.diagnostics.emit(diagnostic);
}
pub(crate) fn warn_balanced(
&mut self,
token_type: i32,
op: &'static str,
syn: &'static str,
c: MaybeByte,
space_seen: bool,
last_state: LexState,
) -> i32 {
if !last_state.is_some(EXPR_CLASS | EXPR_DOT | EXPR_FNAME | EXPR_ENDFN)
&& space_seen & !c.is_space()
{
self.warn(
DiagnosticMessage::AmbiguousOperator {
operator: op.to_string(),
interpreted_as: syn.to_string(),
},
self.current_loc(),
);
}
token_type
}
pub(crate) fn compile_error(&mut self, message: DiagnosticMessage, loc: Loc) {
println_if_debug_lexer!("Compile error: {}", message.render());
let diagnostic = Diagnostic {
level: ErrorLevel::Error,
message,
loc,
};
self.diagnostics.emit(diagnostic);
}
pub(crate) fn new_strterm(
&self,
func: usize,
term: u8,
paren: Option<u8>,
heredoc_end: Option<HeredocEnd>,
) -> Option<Box<StrTerm>> {
Some(Box::new(StrTerm::new_literal(StringLiteral::new(
0,
func,
paren,
term,
heredoc_end,
))))
}
pub(crate) fn loc(&self, begin_pos: usize, end_pos: usize) -> Loc {
Loc {
begin: begin_pos,
end: end_pos,
}
}
pub(crate) fn current_loc(&self) -> Loc {
self.loc(self.buffer.ptok, self.buffer.pcur)
}
pub(crate) fn arg_ambiguous(&mut self, c: u8, loc: Loc) -> bool {
if c == b'/' {
self.warn(DiagnosticMessage::AmbiguousRegexp {}, loc);
} else {
self.warn(
DiagnosticMessage::AmbiguousFirstArgument { operator: c },
loc,
);
}
true
}
pub(crate) fn toklen(&self) -> usize {
self.tokenbuf.len()
}
pub(crate) fn tokfix(&self) {
}
pub(crate) fn yyerror0(&mut self, message: DiagnosticMessage) {
self.yyerror1(message, self.current_loc());
}
pub(crate) fn yyerror1(&mut self, message: DiagnosticMessage, loc: Loc) {
println_if_debug_lexer!("yyerror0: {}", message.render());
let diagnostic = Diagnostic {
level: ErrorLevel::Error,
message,
loc,
};
self.diagnostics.emit(diagnostic);
}
pub(crate) fn is_lambda_beginning(&self) -> bool {
self.lpar_beg == self.paren_nest
}
pub(crate) fn tokadd_ident(&mut self, mut c: MaybeByte) -> bool {
loop {
if self.tokadd_mbchar(c).is_err() {
return true;
}
c = self.nextc();
if !self.is_identchar() {
break;
}
}
self.buffer.pushback(c);
false
}
pub(crate) fn newtok(&mut self) {
self.buffer.tokidx = 0;
self.buffer.tokline = self.buffer.ruby_sourceline;
self.tokenbuf = TokenBuf::default();
}
pub(crate) fn literal_flush(&mut self, ptok: usize) {
self.buffer.set_ptok(ptok);
}
pub(crate) fn tokadd_mbchar(&mut self, c: MaybeByte) -> Result<(), ()> {
let mut len = match self.multibyte_char_len(self.buffer.pcur - 1) {
Some(len) => len,
None => return Err(()),
};
match c {
MaybeByte::EndOfInput => return Err(()),
_ => self.tokadd(c),
}
len -= 1;
self.buffer.pcur += len;
self.tokcopy(len);
Ok(())
}
fn _multibyte_char_len(&self, ptr: usize) -> Option<usize> {
let c1 = self.buffer.byte_at(ptr).as_option()?;
let len = if c1 & 0x80 == 0 {
1
} else if c1 & 0xE0 == 0xC0 {
2
} else if c1 & 0xF0 == 0xE0 {
3
} else if c1 & 0xF8 == 0xF0 {
4
} else {
return None;
};
let bytes = self.buffer.substr_at(ptr, ptr + len)?;
std::str::from_utf8(bytes).ok()?;
Some(len)
}
pub(crate) fn multibyte_char_len(&mut self, ptr: usize) -> Option<usize> {
let result = self._multibyte_char_len(ptr);
if result.is_none() {
self.yyerror0(DiagnosticMessage::InvalidMultibyteChar {});
}
result
}
pub(crate) fn is_label_suffix(&self, n: usize) -> bool {
self.buffer.peek_n(b':', n) && !self.buffer.peek_n(b':', n + 1)
}
pub(crate) fn is_lvar_defined(&self, name: &str) -> bool {
self.static_env.is_declared(name)
}
}