use crate::ns::*;
pub struct Tokenizer<'input> {
compilation_unit: Rc<CompilationUnit>,
characters: CharacterReader<'input>,
}
impl<'input> Tokenizer<'input> {
pub fn new(compilation_unit: &'input Rc<CompilationUnit>, options: &ParserOptions) -> Self {
let text: &'input str = compilation_unit.text();
let compilation_unit = compilation_unit.clone();
let characters: CharacterReader<'input>;
if let Some(range) = options.byte_range {
characters = CharacterReader::from_offset(&text[0..range.1], range.0);
} else {
characters = CharacterReader::from(text);
}
Self {
compilation_unit,
characters,
}
}
pub fn compilation_unit(&self) -> &Rc<CompilationUnit> {
&self.compilation_unit
}
pub fn characters(&self) -> &CharacterReader<'input> {
&self.characters
}
fn add_syntax_error(&self, location: &Location, kind: DiagnosticKind, arguments: Vec<Rc<dyn DiagnosticArgument>>) {
if self.compilation_unit.prevent_equal_offset_error(location) {
return;
}
self.compilation_unit.add_diagnostic(Diagnostic::new_syntax_error(location, kind, arguments));
}
pub fn scan_ie_div(&mut self) -> (Token, Location) {
loop {
let ch = self.characters.peek_or_zero();
if CharacterValidator::is_whitespace(ch) {
self.characters.next();
} else if self.consume_line_terminator() || self.consume_comment() {
} else {
break;
}
}
if let Some(result) = self.scan_identifier() {
return result;
}
if let Some(result) = self.scan_dot_or_numeric_literal() {
return result;
}
if let Some(result) = self.scan_string_literal(false) {
return result;
}
let start = self.cursor_location();
match self.characters.peek_or_zero() {
',' => {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::Comma, location);
},
'(' => {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::ParenOpen, location);
},
')' => {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::ParenClose, location);
},
'[' => {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::SquareOpen, location);
},
']' => {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::SquareClose, location);
},
'{' => {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::BlockOpen, location);
},
'}' => {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::BlockClose, location);
},
':' => {
self.characters.next();
if self.characters.peek_or_zero() == ':' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::ColonColon, location);
}
let location = start.combine_with(self.cursor_location());
return (Token::Colon, location);
},
'=' => {
self.characters.next();
let ch = self.characters.peek_or_zero();
if ch == '=' && self.characters.peek_at_or_zero(1) == '=' {
self.characters.skip_count_in_place(2);
let location = start.combine_with(self.cursor_location());
return (Token::StrictEquals, location);
}
if ch == '=' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::Equals, location);
}
let location = start.combine_with(self.cursor_location());
return (Token::Assign, location);
},
'!' => {
self.characters.next();
let ch = self.characters.peek_or_zero();
if ch == '=' && self.characters.peek_at_or_zero(1) == '=' {
self.characters.skip_count_in_place(2);
let location = start.combine_with(self.cursor_location());
return (Token::StrictNotEquals, location);
}
if ch == '=' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::NotEquals, location);
}
let location = start.combine_with(self.cursor_location());
return (Token::Exclamation, location);
},
'?' => {
self.characters.next();
let ch = self.characters.peek_or_zero();
if ch == '.' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::OptionalChaining, location);
}
if ch == '?' && self.characters.peek_at_or_zero(1) == '=' {
self.characters.skip_count_in_place(2);
let location = start.combine_with(self.cursor_location());
return (Token::NullCoalescingAssign, location);
}
if ch == '?' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::NullCoalescing, location);
}
let location = start.combine_with(self.cursor_location());
return (Token::Question, location);
},
';' => {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::Semicolon, location);
},
'<' => {
self.characters.next();
let ch = self.characters.peek_or_zero();
if ch == '=' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::Le, location);
}
if ch == '<' && self.characters.peek_at_or_zero(1) == '=' {
self.characters.skip_count_in_place(2);
let location = start.combine_with(self.cursor_location());
return (Token::LeftShiftAssign, location);
}
if ch == '<' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::LeftShift, location);
}
let location = start.combine_with(self.cursor_location());
return (Token::Lt, location);
},
'>' => {
self.characters.next();
let ch = self.characters.peek_or_zero();
if ch == '=' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::Ge, location);
}
if ch == '>' && self.characters.peek_at_or_zero(1) == '=' {
self.characters.skip_count_in_place(2);
let location = start.combine_with(self.cursor_location());
return (Token::RightShiftAssign, location);
}
if ch == '>' && self.characters.peek_seq(3) == ">>=" {
self.characters.skip_count_in_place(3);
let location = start.combine_with(self.cursor_location());
return (Token::UnsignedRightShiftAssign, location);
}
if ch == '>' && self.characters.peek_at_or_zero(1) == '>' {
self.characters.skip_count_in_place(2);
let location = start.combine_with(self.cursor_location());
return (Token::UnsignedRightShift, location);
}
if ch == '>' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::RightShift, location);
}
let location = start.combine_with(self.cursor_location());
return (Token::Gt, location);
},
'@' => {
self.characters.next();
if let Some(token) = self.scan_string_literal(true) {
return token;
}
let location = start.combine_with(self.cursor_location());
return (Token::Attribute, location);
},
'+' => {
self.characters.next();
let ch = self.characters.peek_or_zero();
if ch == '+' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::Increment, location);
}
if ch == '=' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::AddAssign, location);
}
let location = start.combine_with(self.cursor_location());
return (Token::Plus, location);
},
'-' => {
self.characters.next();
let ch = self.characters.peek_or_zero();
if ch == '-' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::Decrement, location);
}
if ch == '=' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::SubtractAssign, location);
}
let location = start.combine_with(self.cursor_location());
return (Token::Minus, location);
},
'*' => {
self.characters.next();
let ch = self.characters.peek_or_zero();
if ch == '*' && self.characters.peek_at_or_zero(1) == '=' {
self.characters.skip_count_in_place(2);
let location = start.combine_with(self.cursor_location());
return (Token::PowerAssign, location);
}
if ch == '*' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::Power, location);
}
if ch == '=' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::MultiplyAssign, location);
}
let location = start.combine_with(self.cursor_location());
return (Token::Times, location);
},
'/' => {
self.characters.next();
let ch = self.characters.peek_or_zero();
if ch == '=' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::DivideAssign, location);
}
let location = start.combine_with(self.cursor_location());
return (Token::Div, location);
},
'%' => {
self.characters.next();
let ch = self.characters.peek_or_zero();
if ch == '=' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::RemainderAssign, location);
}
let location = start.combine_with(self.cursor_location());
return (Token::Percent, location);
},
'&' => {
self.characters.next();
let ch = self.characters.peek_or_zero();
if ch == '&' && self.characters.peek_at_or_zero(1) == '=' {
self.characters.skip_count_in_place(2);
let location = start.combine_with(self.cursor_location());
return (Token::LogicalAndAssign, location);
}
if ch == '&' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::LogicalAnd, location);
}
if ch == '=' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::BitwiseAndAssign, location);
}
let location = start.combine_with(self.cursor_location());
return (Token::Ampersand, location);
},
'^' => {
self.characters.next();
let ch = self.characters.peek_or_zero();
if ch == '^' && self.characters.peek_at_or_zero(1) == '=' {
self.characters.skip_count_in_place(2);
let location = start.combine_with(self.cursor_location());
return (Token::LogicalXorAssign, location);
}
if ch == '^' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::LogicalXor, location);
}
if ch == '=' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::BitwiseXorAssign, location);
}
let location = start.combine_with(self.cursor_location());
return (Token::Hat, location);
},
'|' => {
self.characters.next();
let ch = self.characters.peek_or_zero();
if ch == '|' && self.characters.peek_at_or_zero(1) == '=' {
self.characters.skip_count_in_place(2);
let location = start.combine_with(self.cursor_location());
return (Token::LogicalOrAssign, location);
}
if ch == '|' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::LogicalOr, location);
}
if ch == '=' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::BitwiseOrAssign, location);
}
let location = start.combine_with(self.cursor_location());
return (Token::Pipe, location);
},
'~' => {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::Tilde, location);
},
_ => {
if self.characters.has_remaining() {
self.add_unexpected_error();
self.characters.next();
return self.scan_ie_div();
} else {
return (Token::Eof, start)
}
},
}
}
pub fn scan_regexp_literal(&mut self, start: Location, mut body: String) -> (Token, Location) {
loop {
let ch = self.characters.peek_or_zero();
if ch == '/' {
self.characters.next();
break;
} else if ch == '\\' {
self.characters.next();
body.push('\\');
let ch = self.characters.peek_or_zero();
if self.characters.reached_end() {
self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSlashForRegExp);
break;
} else if CharacterValidator::is_line_terminator(ch) {
self.add_unexpected_error();
self.consume_line_terminator();
} else {
self.characters.next();
body.push(ch);
}
} else if CharacterValidator::is_line_terminator(ch) {
body.push('\n');
self.consume_line_terminator();
} else if self.characters.reached_end() {
self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSlashForRegExp);
break;
} else {
body.push(ch);
self.characters.next();
}
}
let mut flags = String::new();
while let Some((ch, _)) = self.consume_identifier_part() {
flags.push(ch);
}
let location = start.combine_with(self.cursor_location());
(Token::RegExp { body, flags }, location)
}
fn character_ahead_location(&self) -> Location {
if self.characters.reached_end() {
return self.cursor_location();
}
let offset = self.characters.index();
let mut next_characters = self.characters.clone();
next_characters.next().unwrap();
Location::with_offsets(&self.compilation_unit, offset, next_characters.index())
}
pub fn cursor_location(&self) -> Location {
let offset = self.characters.index();
Location::with_offset(&self.compilation_unit, offset)
}
fn add_unexpected_error(&self) {
if self.characters.has_remaining() {
self.add_syntax_error(&self.character_ahead_location(), DiagnosticKind::UnexpectedCharacter, diagarg![self.characters.peek_or_zero().to_string()])
} else {
self.add_syntax_error(&self.cursor_location(), DiagnosticKind::UnexpectedEnd, vec![])
}
}
fn add_unexpected_eof_error(&self, kind: DiagnosticKind) {
self.add_syntax_error(&self.cursor_location(), kind, vec![]);
}
fn consume_line_terminator(&mut self) -> bool {
let ch = self.characters.peek_or_zero();
if ch == '\x0D' && self.characters.peek_at_or_zero(1) == '\x0A' {
self.characters.skip_count_in_place(2);
return true;
}
if CharacterValidator::is_line_terminator(ch) {
self.characters.next();
return true;
}
false
}
fn consume_comment(&mut self) -> bool {
let ch = self.characters.peek_or_zero();
if ch != '/' {
return false;
}
let ch2 = self.characters.peek_at_or_zero(1);
if ch2 == '/' {
let start = self.cursor_location();
self.characters.skip_count_in_place(2);
while !CharacterValidator::is_line_terminator(self.characters.peek_or_zero()) && self.characters.has_remaining() {
self.characters.skip_in_place();
}
let location = start.combine_with(self.cursor_location());
self.consume_line_terminator();
self.compilation_unit.add_comment(Rc::new(Comment {
multiline: false,
content: RefCell::new(self.compilation_unit.text()[(location.first_offset() + 2)..location.last_offset()].to_owned()),
location: RefCell::new(location),
}));
return true;
}
if ch2 == '*' {
let start = self.cursor_location();
self.characters.skip_count_in_place(2);
loop {
if self.characters.peek_or_zero() == '*' && self.characters.peek_at_or_zero(1) == '/' {
self.characters.skip_count_in_place(2);
break;
} else if self.consume_line_terminator() {
} else if self.characters.has_remaining() {
self.characters.skip_in_place();
} else {
self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSeqForMultiLineComment);
break;
}
}
let location = start.combine_with(self.cursor_location());
let i = location.first_offset() + 2;
let j = decrease_last_offset(i, location.last_offset(), 2);
self.compilation_unit.add_comment(Rc::new(Comment {
multiline: true,
content: RefCell::new(self.compilation_unit.text()[i..j].to_owned()),
location: RefCell::new(location),
}));
return true;
}
false
}
fn scan_identifier(&mut self) -> Option<(Token, Location)> {
let start = self.cursor_location();
let mut escaped = false;
let Some((ch, escaped_2)) = self.consume_identifier_start() else {
return None;
};
escaped = escaped || escaped_2;
let mut name = String::new();
name.push(ch);
while let Some((ch, escaped_2)) = self.consume_identifier_part() {
escaped = escaped || escaped_2;
name.push(ch);
}
let location = start.combine_with(self.cursor_location());
if !escaped {
if let Some(token) = As3ReservedWord::token(name.as_ref()) {
return Some((token, location));
}
}
Some((Token::Identifier(name), location))
}
fn consume_identifier_start(&mut self) -> Option<(char, bool)> {
let ch = self.characters.peek_or_zero();
if CharacterValidator::is_identifier_start(ch) {
self.characters.next();
return Some((ch, false));
}
if self.characters.peek_or_zero() == '\\' {
self.characters.next();
return Some((self.expect_unicode_escape_sequence(), true));
}
None
}
fn consume_identifier_part(&mut self) -> Option<(char, bool)> {
let ch = self.characters.peek_or_zero();
if CharacterValidator::is_identifier_part(ch) {
self.characters.next();
return Some((ch, false));
}
if self.characters.peek_or_zero() == '\\' {
self.characters.next();
return Some((self.expect_unicode_escape_sequence(), true));
}
None
}
fn expect_unicode_escape_sequence(&mut self) -> char {
let start = self.cursor_location();
if self.characters.peek_or_zero() != 'u' {
self.add_unexpected_error();
return '\x5F';
}
self.characters.next();
if CharacterValidator::is_hex_digit(self.characters.peek_or_zero()) {
let r = char::from_u32(self.expect_hex_digit() << 12
| (self.expect_hex_digit() << 8)
| (self.expect_hex_digit() << 4)
| self.expect_hex_digit());
let Some(r) = r else {
self.add_syntax_error(&start.combine_with(self.cursor_location()), DiagnosticKind::InvalidEscapeValue, vec![]);
return '\x5F';
};
return r;
}
if self.characters.peek_or_zero() != '{' {
self.add_unexpected_error();
return '\x5F';
}
self.characters.next();
while CharacterValidator::is_hex_digit(self.characters.peek_or_zero()) {
self.characters.next();
}
if self.characters.peek_or_zero() != '}' {
self.add_unexpected_error();
return '\x5F';
}
self.characters.next();
let location = start.combine_with(self.cursor_location());
let r = u32::from_str_radix(&self.compilation_unit.text()[(start.first_offset + 2)..(location.last_offset - 1)], 16);
let Ok(r) = r else {
self.add_syntax_error(&location, DiagnosticKind::InvalidEscapeValue, vec![]);
return '\x5F';
};
let r = char::from_u32(r);
let Some(r) = r else {
self.add_syntax_error(&location, DiagnosticKind::InvalidEscapeValue, vec![]);
return '\x5F';
};
r
}
fn expect_hex_digit(&mut self) -> u32 {
let ch = self.characters.peek_or_zero();
let mv = CharacterValidator::hex_digit_mv(ch);
if mv.is_none() {
self.add_unexpected_error();
return 0x5F;
}
self.characters.next();
mv.unwrap()
}
fn scan_dot_or_numeric_literal(&mut self) -> Option<(Token, Location)> {
let start = self.cursor_location();
let ch = self.characters.peek_or_zero();
let mut initial_dot = false;
if ch == '.' {
initial_dot = true;
self.characters.next();
let seq = self.characters.peek_seq(2);
if seq == ".." {
self.characters.skip_count_in_place(2);
return Some((Token::Ellipsis, start.combine_with(self.cursor_location())));
}
let ch = seq.get(..1).map(|ch| ch.chars().next().unwrap()).unwrap_or('\x00');
if ch == '.' {
self.characters.next();
return Some((Token::Descendants, start.combine_with(self.cursor_location())));
}
if !CharacterValidator::is_dec_digit(ch) {
return Some((Token::Dot, start.combine_with(self.cursor_location())));
}
while CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
self.characters.next();
self.consume_underscore_followed_by_dec_digit();
}
} else if ch == '0' {
self.characters.next();
let ch_2 = self.characters.peek_or_zero();
if ['X', 'x'].contains(&ch_2) {
self.characters.next();
return self.scan_hex_literal(start.clone());
}
if ['B', 'b'].contains(&ch_2) {
self.characters.next();
return self.scan_bin_literal(start.clone());
}
} else if CharacterValidator::is_dec_digit(ch) {
while CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
self.characters.next();
self.consume_underscore_followed_by_dec_digit();
}
} else {
return None;
}
if !initial_dot && self.characters.peek_or_zero() == '.' {
self.characters.next();
while CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
self.characters.next();
self.consume_underscore_followed_by_dec_digit();
}
}
if ['E', 'e'].contains(&self.characters.peek_or_zero()) {
self.characters.next();
if ['+', '-'].contains(&self.characters.peek_or_zero()) {
self.characters.next();
}
if !CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
self.add_unexpected_error();
}
while CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
self.characters.next();
self.consume_underscore_followed_by_dec_digit();
}
}
let string = self.compilation_unit.text()[start.first_offset..self.characters.index()].to_owned();
let mut suffix = NumberSuffix::None;
if self.characters.peek_or_zero() == 'f' || self.characters.peek_or_zero() == 'F' {
suffix = NumberSuffix::F;
self.characters.next();
}
self.unallow_numeric_suffix();
let location = start.combine_with(self.cursor_location());
Some((Token::Number(string, suffix), location))
}
fn scan_hex_literal(&mut self, start: Location) -> Option<(Token, Location)> {
if !CharacterValidator::is_hex_digit(self.characters.peek_or_zero()) {
self.add_unexpected_error();
}
while CharacterValidator::is_hex_digit(self.characters.peek_or_zero()) {
self.characters.next();
self.consume_underscore_followed_by_hex_digit();
}
let suffix = NumberSuffix::None;
self.unallow_numeric_suffix();
let location = start.combine_with(self.cursor_location());
let s = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
Some((Token::Number(s, suffix), location))
}
fn scan_bin_literal(&mut self, start: Location) -> Option<(Token, Location)> {
if !CharacterValidator::is_bin_digit(self.characters.peek_or_zero()) {
self.add_unexpected_error();
}
while CharacterValidator::is_bin_digit(self.characters.peek_or_zero()) {
self.characters.next();
self.consume_underscore_followed_by_bin_digit();
}
let suffix = NumberSuffix::None;
self.unallow_numeric_suffix();
let location = start.combine_with(self.cursor_location());
let s = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
Some((Token::Number(s, suffix), location))
}
fn consume_underscore_followed_by_dec_digit(&mut self) {
if self.characters.peek_or_zero() == '_' {
self.characters.next();
if !CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
self.add_unexpected_error();
}
self.characters.next();
}
}
fn consume_underscore_followed_by_hex_digit(&mut self) {
if self.characters.peek_or_zero() == '_' {
self.characters.next();
if !CharacterValidator::is_hex_digit(self.characters.peek_or_zero()) {
self.add_unexpected_error();
}
self.characters.next();
}
}
fn consume_underscore_followed_by_bin_digit(&mut self) {
if self.characters.peek_or_zero() == '_' {
self.characters.next();
if !CharacterValidator::is_bin_digit(self.characters.peek_or_zero()) {
self.add_unexpected_error();
}
self.characters.next();
}
}
fn unallow_numeric_suffix(&self) {
if CharacterValidator::is_identifier_start(self.characters.peek_or_zero()) {
self.add_unexpected_error();
}
}
fn scan_string_literal(&mut self, raw: bool) -> Option<(Token, Location)> {
let delim = self.characters.peek_or_zero();
if !['"', '\''].contains(&delim) {
return None;
}
let mut start = self.cursor_location();
if raw {
start = Location::with_offset(&start.compilation_unit(), start.first_offset() - 1);
}
self.characters.next();
if self.characters.peek_or_zero() == delim && self.characters.peek_at_or_zero(1) == delim {
self.characters.skip_count_in_place(2);
return self.scan_triple_string_literal(delim, start, raw);
}
let mut value = String::new();
if raw {
loop {
let ch = self.characters.peek_or_zero();
if ch == delim {
self.characters.next();
break;
} else if CharacterValidator::is_line_terminator(ch) {
self.add_syntax_error(&self.character_ahead_location(), DiagnosticKind::StringLiteralMustBeTerminatedBeforeLineBreak, vec![]);
self.consume_line_terminator();
} else if !self.characters.has_remaining() {
self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForString);
break;
} else {
value.push(ch);
self.characters.next();
}
}
} else {
loop {
if let Some(s) = self.consume_escape_sequence() {
value.push_str(&s);
} else {
let ch = self.characters.peek_or_zero();
if ch == delim {
self.characters.next();
break;
} else if CharacterValidator::is_line_terminator(ch) {
self.add_syntax_error(&self.character_ahead_location(), DiagnosticKind::StringLiteralMustBeTerminatedBeforeLineBreak, vec![]);
self.consume_line_terminator();
} else if !self.characters.has_remaining() {
self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForString);
break;
} else {
value.push(ch);
self.characters.next();
}
}
}
}
let location = start.combine_with(self.cursor_location());
Some((Token::String(value), location))
}
fn scan_triple_string_literal(&mut self, delim: char, start: Location, raw: bool) -> Option<(Token, Location)> {
let mut lines: Vec<String> = vec![];
let mut builder = String::new();
if raw {
loop {
let ch = self.characters.peek_or_zero();
if ch == delim && self.characters.peek_at_or_zero(1) == delim && self.characters.peek_at_or_zero(2) == delim {
self.characters.skip_count_in_place(3);
lines.push(builder.clone());
break;
} else if CharacterValidator::is_line_terminator(ch) {
lines.push(builder.clone());
builder.clear();
self.consume_line_terminator();
} else if !self.characters.has_remaining() {
self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForString);
lines.push(builder.clone());
builder.clear();
break;
} else {
builder.push(ch);
self.characters.next();
}
}
} else {
loop {
if let Some(s) = self.consume_escape_sequence() {
builder.push_str(&s);
} else {
let ch = self.characters.peek_or_zero();
if ch == delim && self.characters.peek_at_or_zero(1) == delim && self.characters.peek_at_or_zero(2) == delim {
self.characters.skip_count_in_place(3);
lines.push(builder.clone());
break;
} else if CharacterValidator::is_line_terminator(ch) {
lines.push(builder.clone());
builder.clear();
self.consume_line_terminator();
} else if !self.characters.has_remaining() {
self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForString);
lines.push(builder.clone());
builder.clear();
break;
} else {
builder.push(ch);
self.characters.next();
}
}
}
}
let location = start.combine_with(self.cursor_location());
if lines[0].is_empty() && lines.len() > 1 {
lines.remove(0);
}
let last_line = lines.pop().unwrap();
let base_indent = CharacterValidator::indent_count(&last_line);
let mut lines: Vec<String> = lines.iter().map(|line| {
let indent = CharacterValidator::indent_count(line);
line[usize::min(base_indent, indent)..].to_owned()
}).collect();
let last_line = last_line[base_indent..].to_owned();
if !last_line.is_empty() {
lines.push(last_line);
}
let value = lines.join("\n");
Some((Token::String(value), location))
}
fn consume_escape_sequence(&mut self) -> Option<String> {
if self.characters.peek_or_zero() != '\\' {
return None;
}
self.characters.next();
if !self.characters.has_remaining() {
self.add_unexpected_error();
return Some("".into());
}
if self.consume_line_terminator() {
return Some("".into());
}
let ch = self.characters.peek_or_zero();
match ch {
'\'' | '"' | '\\' => {
self.characters.next();
Some(ch.into())
},
'u' => {
Some(self.expect_unicode_escape_sequence().into())
},
'x' => {
self.characters.next();
let v = (self.expect_hex_digit() << 4) | self.expect_hex_digit();
let v = char::from_u32(v).unwrap();
Some(v.into())
},
'b' => {
self.characters.next();
Some('\x08'.into())
},
'f' => {
self.characters.next();
Some('\x0C'.into())
},
'n' => {
self.characters.next();
Some('\x0A'.into())
},
'r' => {
self.characters.next();
Some('\x0D'.into())
},
't' => {
self.characters.next();
Some('\x09'.into())
},
'v' => {
self.characters.next();
Some('\x0B'.into())
},
'0' => {
self.characters.next();
if CharacterValidator::is_dec_digit(self.characters.peek_or_zero()) {
self.add_unexpected_error();
}
Some('\x00'.into())
},
ch => {
if CharacterValidator::is_dec_digit(ch) {
self.add_unexpected_error();
}
self.characters.next();
Some(ch.into())
},
}
}
pub fn scan_ie_xml_tag(&mut self) -> (Token, Location) {
let start = self.cursor_location();
let ch = self.characters.peek_or_zero();
if CharacterValidator::is_xml_name_start(ch) {
self.characters.next();
while CharacterValidator::is_xml_name_part(self.characters.peek_or_zero()) {
self.characters.next();
}
let location = start.combine_with(self.cursor_location());
let name = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
return (Token::XmlName(name), location);
}
if CharacterValidator::is_xml_whitespace(ch) {
while CharacterValidator::is_xml_whitespace(self.characters.peek_or_zero()) {
if !self.consume_line_terminator() {
self.characters.next();
}
}
let location = start.combine_with(self.cursor_location());
return (Token::XmlWhitespace, location);
}
match ch {
'=' => {
self.characters.next();
let location = start.combine_with(self.cursor_location());
(Token::Assign, location)
},
'>' => {
self.characters.next();
let location = start.combine_with(self.cursor_location());
(Token::Gt, location)
},
'/' => {
self.characters.next();
if self.characters.peek_or_zero() != '>' {
self.add_unexpected_error();
let location = start.combine_with(self.cursor_location());
return (Token::XmlSlashGt, location);
}
self.characters.next();
let location = start.combine_with(self.cursor_location());
(Token::XmlSlashGt, location)
},
'"' | '\'' => {
let delim = ch;
self.characters.next();
while self.characters.peek_or_zero() != delim && self.characters.has_remaining() {
if !self.consume_line_terminator() {
self.characters.next();
}
}
if self.characters.reached_end() {
self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingQuoteForAttributeValue);
let value = self.compilation_unit.text()[(start.first_offset + 1)..self.cursor_location().first_offset].to_owned();
let location = start.combine_with(self.cursor_location());
return (Token::XmlAttributeValue(value), location);
}
let value = self.compilation_unit.text()[(start.first_offset + 1)..self.cursor_location().first_offset].to_owned();
self.characters.next();
let location = start.combine_with(self.cursor_location());
(Token::XmlAttributeValue(value), location)
},
'{' => {
self.characters.next();
let location = start.combine_with(self.cursor_location());
(Token::BlockOpen, location)
},
_ => {
if self.characters.reached_end() {
return (Token::Eof, self.cursor_location());
}
self.add_unexpected_error();
self.characters.next();
self.scan_ie_xml_tag()
},
}
}
pub fn scan_ie_xml_content(&mut self) -> (Token, Location) {
let start = self.cursor_location();
let ch = self.characters.peek_or_zero();
match ch {
'<' => {
self.characters.next();
if let Some(r) = self.scan_xml_markup(start.clone()) {
return r;
}
if self.characters.peek_or_zero() == '/' {
self.characters.next();
let location = start.combine_with(self.cursor_location());
return (Token::XmlLtSlash, location);
}
let location = start.combine_with(self.cursor_location());
(Token::Lt, location)
},
'{' => {
self.characters.next();
let location = start.combine_with(self.cursor_location());
(Token::BlockOpen, location)
},
_ => {
if self.characters.reached_end() {
return (Token::Eof, self.cursor_location());
}
loop {
let ch = self.characters.peek_or_zero();
if ['<', '{'].contains(&ch) {
break;
}
if CharacterValidator::is_line_terminator(ch) {
self.consume_line_terminator();
} else if self.characters.has_remaining() {
self.characters.next();
} else {
break;
}
}
let location = start.combine_with(self.cursor_location());
let content = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
(Token::XmlText(content), location)
},
}
}
pub fn scan_xml_markup(&mut self, start: Location) -> Option<(Token, Location)> {
if self.characters.peek_seq(3) == "!--" {
self.characters.skip_count_in_place(3);
loop {
if self.characters.peek_or_zero() == '-' && self.characters.peek_seq(3) == "-->" {
self.characters.skip_count_in_place(3);
break;
} else if CharacterValidator::is_line_terminator(self.characters.peek_or_zero()) {
self.consume_line_terminator();
} else if self.characters.reached_end() {
self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSeqForXmlComment);
break;
} else {
self.characters.next();
}
}
let location = start.combine_with(self.cursor_location());
let content = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
return Some((Token::XmlMarkup(content), location));
}
if self.characters.peek_seq(8) == "![CDATA[" {
self.characters.skip_count_in_place(8);
loop {
if self.characters.peek_or_zero() == ']' && self.characters.peek_seq(3) == "]]>" {
self.characters.skip_count_in_place(3);
break;
} else if CharacterValidator::is_line_terminator(self.characters.peek_or_zero()) {
self.consume_line_terminator();
} else if self.characters.reached_end() {
self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSeqForCData);
break;
} else {
self.characters.next();
}
}
let location = start.combine_with(self.cursor_location());
let content = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
return Some((Token::XmlMarkup(content), location));
}
if self.characters.peek_or_zero() == '?' {
self.characters.next();
loop {
if self.characters.peek_or_zero() == '?' && self.characters.peek_at_or_zero(1) == '>' {
self.characters.skip_count_in_place(2);
break;
} else if CharacterValidator::is_line_terminator(self.characters.peek_or_zero()) {
self.consume_line_terminator();
} else if self.characters.reached_end() {
self.add_unexpected_eof_error(DiagnosticKind::InputEndedBeforeReachingClosingSeqForPi);
break;
} else {
self.characters.next();
}
}
let location = start.combine_with(self.cursor_location());
let content = self.compilation_unit.text()[location.first_offset..location.last_offset].to_owned();
return Some((Token::XmlMarkup(content), location));
}
None
}
}