#include "frontend/TokenStream.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/Attributes.h"
#include "mozilla/IntegerTypeTraits.h"
#include "mozilla/Likely.h"
#include "mozilla/Maybe.h"
#include "mozilla/MemoryChecking.h"
#include "mozilla/ScopeExit.h"
#include "mozilla/Span.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Utf8.h"
#include <algorithm>
#include <ctype.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <utility>
#include "jsexn.h"
#include "jsnum.h"
#include "frontend/BytecodeCompiler.h"
#include "frontend/Parser.h"
#include "frontend/ReservedWords.h"
#include "js/CharacterEncoding.h"
#include "js/UniquePtr.h"
#include "util/StringBuffer.h"
#include "util/Unicode.h"
#include "vm/HelperThreads.h"
#include "vm/JSAtom.h"
#include "vm/JSContext.h"
#include "vm/Realm.h"
using mozilla::ArrayLength;
using mozilla::AssertedCast;
using mozilla::DecodeOneUtf8CodePoint;
using mozilla::IsAscii;
using mozilla::IsAsciiAlpha;
using mozilla::IsAsciiDigit;
using mozilla::IsTrailingUnit;
using mozilla::MakeScopeExit;
using mozilla::MakeSpan;
using mozilla::Maybe;
using mozilla::PointerRangeSize;
using mozilla::Utf8Unit;
using JS::ReadOnlyCompileOptions;
struct ReservedWordInfo {
const char* chars; js::frontend::TokenKind tokentype;
};
static const ReservedWordInfo reservedWords[] = {
#define RESERVED_WORD_INFO(word, name, type) \
{js_##word##_str, js::frontend::type},
FOR_EACH_JAVASCRIPT_RESERVED_WORD(RESERVED_WORD_INFO)
#undef RESERVED_WORD_INFO
};
template <typename CharT>
static const ReservedWordInfo* FindReservedWord(const CharT* s, size_t length) {
MOZ_ASSERT(length != 0);
size_t i;
const ReservedWordInfo* rw;
const char* chars;
#define JSRW_LENGTH() length
#define JSRW_AT(column) s[column]
#define JSRW_GOT_MATCH(index) \
i = (index); \
goto got_match;
#define JSRW_TEST_GUESS(index) \
i = (index); \
goto test_guess;
#define JSRW_NO_MATCH() goto no_match;
#include "frontend/ReservedWordsGenerated.h"
#undef JSRW_NO_MATCH
#undef JSRW_TEST_GUESS
#undef JSRW_GOT_MATCH
#undef JSRW_AT
#undef JSRW_LENGTH
got_match:
return &reservedWords[i];
test_guess:
rw = &reservedWords[i];
chars = rw->chars;
do {
if (*s++ != static_cast<unsigned char>(*chars++)) {
goto no_match;
}
} while (--length != 0);
return rw;
no_match:
return nullptr;
}
template <>
MOZ_ALWAYS_INLINE const ReservedWordInfo* FindReservedWord<Utf8Unit>(
const Utf8Unit* units, size_t length) {
return FindReservedWord(Utf8AsUnsignedChars(units), length);
}
static const ReservedWordInfo* FindReservedWord(
JSLinearString* str, js::frontend::NameVisibility* visibility) {
JS::AutoCheckCannotGC nogc;
if (str->hasLatin1Chars()) {
const JS::Latin1Char* chars = str->latin1Chars(nogc);
size_t length = str->length();
if (length > 0 && chars[0] == '#') {
*visibility = js::frontend::NameVisibility::Private;
return nullptr;
}
*visibility = js::frontend::NameVisibility::Public;
return FindReservedWord(chars, length);
}
const char16_t* chars = str->twoByteChars(nogc);
size_t length = str->length();
if (length > 0 && chars[0] == '#') {
*visibility = js::frontend::NameVisibility::Private;
return nullptr;
}
*visibility = js::frontend::NameVisibility::Public;
return FindReservedWord(chars, length);
}
static uint32_t GetSingleCodePoint(const char16_t** p, const char16_t* end) {
using namespace js;
uint32_t codePoint;
if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(**p)) && *p + 1 < end) {
char16_t lead = **p;
char16_t maybeTrail = *(*p + 1);
if (unicode::IsTrailSurrogate(maybeTrail)) {
*p += 2;
return unicode::UTF16Decode(lead, maybeTrail);
}
}
codePoint = **p;
(*p)++;
return codePoint;
}
namespace js {
namespace frontend {
bool IsIdentifier(JSLinearString* str) {
JS::AutoCheckCannotGC nogc;
MOZ_ASSERT(str);
if (str->hasLatin1Chars()) {
return IsIdentifier(str->latin1Chars(nogc), str->length());
}
return IsIdentifier(str->twoByteChars(nogc), str->length());
}
bool IsIdentifierNameOrPrivateName(JSLinearString* str) {
JS::AutoCheckCannotGC nogc;
MOZ_ASSERT(str);
if (str->hasLatin1Chars()) {
return IsIdentifierNameOrPrivateName(str->latin1Chars(nogc), str->length());
}
return IsIdentifierNameOrPrivateName(str->twoByteChars(nogc), str->length());
}
bool IsIdentifier(const Latin1Char* chars, size_t length) {
if (length == 0) {
return false;
}
if (!unicode::IsIdentifierStart(char16_t(*chars))) {
return false;
}
const Latin1Char* end = chars + length;
while (++chars != end) {
if (!unicode::IsIdentifierPart(char16_t(*chars))) {
return false;
}
}
return true;
}
bool IsIdentifierNameOrPrivateName(const Latin1Char* chars, size_t length) {
if (length == 0) {
return false;
}
if (char16_t(*chars) == '#') {
++chars;
--length;
}
return IsIdentifier(chars, length);
}
bool IsIdentifier(const char16_t* chars, size_t length) {
if (length == 0) {
return false;
}
const char16_t* p = chars;
const char16_t* end = chars + length;
uint32_t codePoint;
codePoint = GetSingleCodePoint(&p, end);
if (!unicode::IsIdentifierStart(codePoint)) {
return false;
}
while (p < end) {
codePoint = GetSingleCodePoint(&p, end);
if (!unicode::IsIdentifierPart(codePoint)) {
return false;
}
}
return true;
}
bool IsIdentifierNameOrPrivateName(const char16_t* chars, size_t length) {
if (length == 0) {
return false;
}
const char16_t* p = chars;
const char16_t* end = chars + length;
uint32_t codePoint;
codePoint = GetSingleCodePoint(&p, end);
if (codePoint == '#') {
if (length == 1) {
return false;
}
codePoint = GetSingleCodePoint(&p, end);
}
if (!unicode::IsIdentifierStart(codePoint)) {
return false;
}
while (p < end) {
codePoint = GetSingleCodePoint(&p, end);
if (!unicode::IsIdentifierPart(codePoint)) {
return false;
}
}
return true;
}
bool IsKeyword(JSLinearString* str) {
NameVisibility visibility;
if (const ReservedWordInfo* rw = FindReservedWord(str, &visibility)) {
return TokenKindIsKeyword(rw->tokentype);
}
return false;
}
TokenKind ReservedWordTokenKind(PropertyName* str) {
NameVisibility visibility;
if (const ReservedWordInfo* rw = FindReservedWord(str, &visibility)) {
return rw->tokentype;
}
return visibility == NameVisibility::Private ? TokenKind::PrivateName
: TokenKind::Name;
}
const char* ReservedWordToCharZ(PropertyName* str) {
NameVisibility visibility;
if (const ReservedWordInfo* rw = FindReservedWord(str, &visibility)) {
return ReservedWordToCharZ(rw->tokentype);
}
return nullptr;
}
const char* ReservedWordToCharZ(TokenKind tt) {
MOZ_ASSERT(tt != TokenKind::Name);
switch (tt) {
#define EMIT_CASE(word, name, type) \
case type: \
return js_##word##_str;
FOR_EACH_JAVASCRIPT_RESERVED_WORD(EMIT_CASE)
#undef EMIT_CASE
default:
MOZ_ASSERT_UNREACHABLE("Not a reserved word PropertyName.");
}
return nullptr;
}
PropertyName* TokenStreamAnyChars::reservedWordToPropertyName(
TokenKind tt) const {
MOZ_ASSERT(tt != TokenKind::Name);
switch (tt) {
#define EMIT_CASE(word, name, type) \
case type: \
return cx->names().name;
FOR_EACH_JAVASCRIPT_RESERVED_WORD(EMIT_CASE)
#undef EMIT_CASE
default:
MOZ_ASSERT_UNREACHABLE("Not a reserved word TokenKind.");
}
return nullptr;
}
TokenStreamAnyChars::SourceCoords::SourceCoords(JSContext* cx,
uint32_t initialLineNumber,
uint32_t initialOffset)
: lineStartOffsets_(cx), initialLineNum_(initialLineNumber), lastIndex_(0) {
uint32_t maxPtr = MAX_PTR;
MOZ_ASSERT(lineStartOffsets_.capacity() >= 2);
MOZ_ALWAYS_TRUE(lineStartOffsets_.reserve(2));
lineStartOffsets_.infallibleAppend(initialOffset);
lineStartOffsets_.infallibleAppend(maxPtr);
}
MOZ_ALWAYS_INLINE bool TokenStreamAnyChars::SourceCoords::add(
uint32_t lineNum, uint32_t lineStartOffset) {
uint32_t index = indexFromLineNumber(lineNum);
uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
MOZ_ASSERT(lineStartOffsets_[0] <= lineStartOffset);
MOZ_ASSERT(lineStartOffsets_[sentinelIndex] == MAX_PTR);
if (index == sentinelIndex) {
uint32_t maxPtr = MAX_PTR;
if (!lineStartOffsets_.append(maxPtr)) {
static_assert(mozilla::IsSame<decltype(lineStartOffsets_.allocPolicy()),
TempAllocPolicy&>::value,
"this function's caller depends on it reporting an "
"error on failure, as TempAllocPolicy ensures");
return false;
}
lineStartOffsets_[index] = lineStartOffset;
} else {
MOZ_ASSERT_IF(index < sentinelIndex,
lineStartOffsets_[index] == lineStartOffset);
}
return true;
}
MOZ_ALWAYS_INLINE bool TokenStreamAnyChars::SourceCoords::fill(
const TokenStreamAnyChars::SourceCoords& other) {
MOZ_ASSERT(lineStartOffsets_[0] == other.lineStartOffsets_[0]);
MOZ_ASSERT(lineStartOffsets_.back() == MAX_PTR);
MOZ_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);
if (lineStartOffsets_.length() >= other.lineStartOffsets_.length()) {
return true;
}
uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];
for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length();
i++) {
if (!lineStartOffsets_.append(other.lineStartOffsets_[i])) {
return false;
}
}
return true;
}
MOZ_ALWAYS_INLINE uint32_t
TokenStreamAnyChars::SourceCoords::indexFromOffset(uint32_t offset) const {
uint32_t iMin, iMax, iMid;
if (lineStartOffsets_[lastIndex_] <= offset) {
if (offset < lineStartOffsets_[lastIndex_ + 1]) {
return lastIndex_; }
lastIndex_++;
if (offset < lineStartOffsets_[lastIndex_ + 1]) {
return lastIndex_; }
lastIndex_++;
if (offset < lineStartOffsets_[lastIndex_ + 1]) {
return lastIndex_; }
iMin = lastIndex_ + 1;
MOZ_ASSERT(iMin <
lineStartOffsets_.length() - 1);
} else {
iMin = 0;
}
iMax = lineStartOffsets_.length() - 2;
while (iMax > iMin) {
iMid = iMin + (iMax - iMin) / 2;
if (offset >= lineStartOffsets_[iMid + 1]) {
iMin = iMid + 1; } else {
iMax = iMid; }
}
MOZ_ASSERT(iMax == iMin);
MOZ_ASSERT(lineStartOffsets_[iMin] <= offset);
MOZ_ASSERT(offset < lineStartOffsets_[iMin + 1]);
lastIndex_ = iMin;
return iMin;
}
TokenStreamAnyChars::SourceCoords::LineToken
TokenStreamAnyChars::SourceCoords::lineToken(uint32_t offset) const {
return LineToken(indexFromOffset(offset), offset);
}
TokenStreamAnyChars::TokenStreamAnyChars(JSContext* cx,
const ReadOnlyCompileOptions& options,
StrictModeGetter* smg)
: srcCoords(cx, options.lineno, options.scriptSourceOffset),
options_(options),
tokens(),
cursor_(0),
lookahead(),
lineno(options.lineno),
flags(),
linebase(0),
prevLinebase(size_t(-1)),
filename_(options.filename()),
displayURL_(nullptr),
sourceMapURL_(nullptr),
cx(cx),
mutedErrors(options.mutedErrors()),
strictModeGetter(smg) {
isExprEnding[size_t(TokenKind::Comma)] = true;
isExprEnding[size_t(TokenKind::Semi)] = true;
isExprEnding[size_t(TokenKind::Colon)] = true;
isExprEnding[size_t(TokenKind::RightParen)] = true;
isExprEnding[size_t(TokenKind::RightBracket)] = true;
isExprEnding[size_t(TokenKind::RightCurly)] = true;
}
template <typename Unit>
TokenStreamCharsBase<Unit>::TokenStreamCharsBase(JSContext* cx,
const Unit* units,
size_t length,
size_t startOffset)
: TokenStreamCharsShared(cx), sourceUnits(units, length, startOffset) {}
template <>
MOZ_MUST_USE bool TokenStreamCharsBase<char16_t>::
fillCharBufferFromSourceNormalizingAsciiLineBreaks(const char16_t* cur,
const char16_t* end) {
MOZ_ASSERT(this->charBuffer.length() == 0);
while (cur < end) {
char16_t ch = *cur++;
if (ch == '\r') {
ch = '\n';
if (cur < end && *cur == '\n') {
cur++;
}
}
if (!this->charBuffer.append(ch)) {
return false;
}
}
MOZ_ASSERT(cur == end);
return true;
}
template <>
MOZ_MUST_USE bool TokenStreamCharsBase<Utf8Unit>::
fillCharBufferFromSourceNormalizingAsciiLineBreaks(const Utf8Unit* cur,
const Utf8Unit* end) {
MOZ_ASSERT(this->charBuffer.length() == 0);
while (cur < end) {
Utf8Unit unit = *cur++;
if (MOZ_LIKELY(IsAscii(unit))) {
char16_t ch = unit.toUint8();
if (ch == '\r') {
ch = '\n';
if (cur < end && *cur == Utf8Unit('\n')) {
cur++;
}
}
if (!this->charBuffer.append(ch)) {
return false;
}
continue;
}
Maybe<char32_t> ch = DecodeOneUtf8CodePoint(unit, &cur, end);
MOZ_ASSERT(ch.isSome(),
"provided source text should already have been validated");
if (!appendCodePointToCharBuffer(ch.value())) {
return false;
}
}
MOZ_ASSERT(cur == end);
return true;
}
template <typename Unit, class AnyCharsAccess>
TokenStreamSpecific<Unit, AnyCharsAccess>::TokenStreamSpecific(
JSContext* cx, const ReadOnlyCompileOptions& options, const Unit* units,
size_t length)
: TokenStreamChars<Unit, AnyCharsAccess>(cx, units, length,
options.scriptSourceOffset) {}
bool TokenStreamAnyChars::checkOptions() {
if (options().column >= mozilla::MaxValue<int32_t>::value / 2 + 1) {
reportErrorNoOffset(JSMSG_BAD_COLUMN_NUMBER);
return false;
}
return true;
}
void TokenStreamAnyChars::reportErrorNoOffset(unsigned errorNumber, ...) {
va_list args;
va_start(args, errorNumber);
reportErrorNoOffsetVA(errorNumber, &args);
va_end(args);
}
void TokenStreamAnyChars::reportErrorNoOffsetVA(unsigned errorNumber,
va_list* args) {
ErrorMetadata metadata;
computeErrorMetadataNoOffset(&metadata);
ReportCompileError(cx, std::move(metadata), nullptr, JSREPORT_ERROR,
errorNumber, args);
}
#if defined(HAVE_GETC_UNLOCKED)
# define fast_getc getc_unlocked
#elif defined(HAVE__GETC_NOLOCK)
# define fast_getc _getc_nolock
#else
# define fast_getc getc
#endif
MOZ_MUST_USE MOZ_ALWAYS_INLINE bool
TokenStreamAnyChars::internalUpdateLineInfoForEOL(uint32_t lineStartOffset) {
prevLinebase = linebase;
linebase = lineStartOffset;
lineno++;
return srcCoords.add(lineno, linebase);
}
void TokenStreamAnyChars::undoInternalUpdateLineInfoForEOL() {
MOZ_ASSERT(prevLinebase !=
size_t(-1)); linebase = prevLinebase;
prevLinebase = size_t(-1);
lineno--;
}
#ifdef DEBUG
template <>
inline void SourceUnits<char16_t>::assertNextCodePoint(
const PeekedCodePoint<char16_t>& peeked) {
char32_t c = peeked.codePoint();
if (c < unicode::NonBMPMin) {
MOZ_ASSERT(peeked.lengthInUnits() == 1);
MOZ_ASSERT(ptr[0] == c);
} else {
MOZ_ASSERT(peeked.lengthInUnits() == 2);
char16_t lead, trail;
unicode::UTF16Encode(c, &lead, &trail);
MOZ_ASSERT(ptr[0] == lead);
MOZ_ASSERT(ptr[1] == trail);
}
}
template <>
inline void SourceUnits<Utf8Unit>::assertNextCodePoint(
const PeekedCodePoint<Utf8Unit>& peeked) {
char32_t c = peeked.codePoint();
uint8_t expectedUnits[4] = {};
if (c < 0x80) {
expectedUnits[0] = AssertedCast<uint8_t>(c);
} else if (c < 0x800) {
expectedUnits[0] = 0b1100'0000 | (c >> 6);
expectedUnits[1] = 0b1000'0000 | (c & 0b11'1111);
} else if (c < 0x10000) {
expectedUnits[0] = 0b1110'0000 | (c >> 12);
expectedUnits[1] = 0b1000'0000 | ((c >> 6) & 0b11'1111);
expectedUnits[2] = 0b1000'0000 | (c & 0b11'1111);
} else {
expectedUnits[0] = 0b1111'0000 | (c >> 18);
expectedUnits[1] = 0b1000'0000 | ((c >> 12) & 0b11'1111);
expectedUnits[2] = 0b1000'0000 | ((c >> 6) & 0b11'1111);
expectedUnits[3] = 0b1000'0000 | (c & 0b11'1111);
}
MOZ_ASSERT(peeked.lengthInUnits() <= 4);
for (uint8_t i = 0; i < peeked.lengthInUnits(); i++) {
MOZ_ASSERT(expectedUnits[i] == ptr[i].toUint8());
}
}
#endif
template <typename Unit>
static size_t ComputeColumn(const Unit* begin, const Unit* end) {
#if JS_COLUMN_DIMENSION_IS_CODE_POINTS
return unicode::CountCodePoints(begin, end);
#else
return PointerRangeSize(begin, end);
#endif
}
template <typename Unit, class AnyCharsAccess>
uint32_t GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeColumn(
LineToken lineToken, uint32_t offset) const {
lineToken.assertConsistentOffset(offset);
const TokenStreamAnyChars& anyChars = anyCharsAccess();
const Unit* begin =
this->sourceUnits.codeUnitPtrAt(anyChars.lineStart(lineToken));
const Unit* end = this->sourceUnits.codeUnitPtrAt(offset);
auto partialCols = AssertedCast<uint32_t>(ComputeColumn(begin, end));
return (lineToken.isFirstLine() ? anyChars.options_.column : 0) + partialCols;
}
template <typename Unit, class AnyCharsAccess>
void GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeLineAndColumn(
uint32_t offset, uint32_t* line, uint32_t* column) const {
const TokenStreamAnyChars& anyChars = anyCharsAccess();
auto lineToken = anyChars.lineToken(offset);
*line = anyChars.lineNumber(lineToken);
*column = computeColumn(lineToken, offset);
}
template <class AnyCharsAccess>
MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::internalEncodingError(
uint8_t relevantUnits, unsigned errorNumber, ...) {
va_list args;
va_start(args, errorNumber);
do {
size_t offset = this->sourceUnits.offset();
ErrorMetadata err;
TokenStreamAnyChars& anyChars = anyCharsAccess();
bool canAddLineOfContext = fillExceptingContext(&err, offset);
if (canAddLineOfContext) {
if (!internalComputeLineOfContext(&err, offset)) {
break;
}
MOZ_ASSERT_IF(err.lineOfContext != nullptr,
err.lineLength == err.tokenOffset);
}
auto notes = MakeUnique<JSErrorNotes>();
if (!notes) {
ReportOutOfMemory(anyChars.cx);
break;
}
constexpr size_t MaxWidth = sizeof("0xHH 0xHH 0xHH 0xHH");
MOZ_ASSERT(relevantUnits > 0);
char badUnitsStr[MaxWidth];
char* ptr = badUnitsStr;
while (relevantUnits > 0) {
byteToString(this->sourceUnits.getCodeUnit().toUint8(), ptr);
ptr[4] = ' ';
ptr += 5;
relevantUnits--;
}
ptr[-1] = '\0';
uint32_t line, column;
computeLineAndColumn(offset, &line, &column);
if (!notes->addNoteASCII(anyChars.cx, anyChars.getFilename(), 0, line,
column, GetErrorMessage, nullptr,
JSMSG_BAD_CODE_UNITS, badUnitsStr)) {
break;
}
ReportCompileError(anyChars.cx, std::move(err), std::move(notes),
JSREPORT_ERROR, errorNumber, &args);
} while (false);
va_end(args);
}
template <class AnyCharsAccess>
MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::badLeadUnit(
Utf8Unit lead) {
uint8_t leadValue = lead.toUint8();
char leadByteStr[5];
byteToTerminatedString(leadValue, leadByteStr);
internalEncodingError(1, JSMSG_BAD_LEADING_UTF8_UNIT, leadByteStr);
}
template <class AnyCharsAccess>
MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::notEnoughUnits(
Utf8Unit lead, uint8_t remaining, uint8_t required) {
uint8_t leadValue = lead.toUint8();
MOZ_ASSERT(required == 2 || required == 3 || required == 4);
MOZ_ASSERT(remaining < 4);
MOZ_ASSERT(remaining < required);
char leadByteStr[5];
byteToTerminatedString(leadValue, leadByteStr);
const char expectedStr[] = {toHexChar(required - 1), '\0'};
const char actualStr[] = {toHexChar(remaining - 1), '\0'};
internalEncodingError(remaining, JSMSG_NOT_ENOUGH_CODE_UNITS, leadByteStr,
expectedStr, required == 2 ? "" : "s", actualStr,
remaining == 2 ? " was" : "s were");
}
template <class AnyCharsAccess>
MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::badTrailingUnit(
uint8_t unitsObserved) {
Utf8Unit badUnit =
this->sourceUnits.addressOfNextCodeUnit()[unitsObserved - 1];
char badByteStr[5];
byteToTerminatedString(badUnit.toUint8(), badByteStr);
internalEncodingError(unitsObserved, JSMSG_BAD_TRAILING_UTF8_UNIT,
badByteStr);
}
template <class AnyCharsAccess>
MOZ_COLD void
TokenStreamChars<Utf8Unit, AnyCharsAccess>::badStructurallyValidCodePoint(
uint32_t codePoint, uint8_t codePointLength, const char* reason) {
constexpr size_t MaxHexSize = sizeof(
"0x1F"
"FFFF"); char codePointCharsArray[MaxHexSize];
char* codePointStr = codePointCharsArray + ArrayLength(codePointCharsArray);
*--codePointStr = '\0';
do {
MOZ_ASSERT(codePointCharsArray < codePointStr);
*--codePointStr = toHexChar(codePoint & 0xF);
codePoint >>= 4;
} while (codePoint);
MOZ_ASSERT(codePointCharsArray + 2 <= codePointStr);
*--codePointStr = 'x';
*--codePointStr = '0';
internalEncodingError(codePointLength, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
codePointStr, reason);
}
template <class AnyCharsAccess>
MOZ_MUST_USE bool
TokenStreamChars<Utf8Unit, AnyCharsAccess>::getNonAsciiCodePointDontNormalize(
Utf8Unit lead, char32_t* codePoint) {
auto onBadLeadUnit = [this, &lead]() { this->badLeadUnit(lead); };
auto onNotEnoughUnits = [this, &lead](uint8_t remaining, uint8_t required) {
this->notEnoughUnits(lead, remaining, required);
};
auto onBadTrailingUnit = [this](uint8_t unitsObserved) {
this->badTrailingUnit(unitsObserved);
};
auto onBadCodePoint = [this](char32_t badCodePoint, uint8_t unitsObserved) {
this->badCodePoint(badCodePoint, unitsObserved);
};
auto onNotShortestForm = [this](char32_t badCodePoint,
uint8_t unitsObserved) {
this->notShortestForm(badCodePoint, unitsObserved);
};
SourceUnitsIterator iter(this->sourceUnits);
Maybe<char32_t> maybeCodePoint = DecodeOneUtf8CodePointInline(
lead, &iter, SourceUnitsEnd(), onBadLeadUnit, onNotEnoughUnits,
onBadTrailingUnit, onBadCodePoint, onNotShortestForm);
if (maybeCodePoint.isNothing()) {
return false;
}
*codePoint = maybeCodePoint.value();
return true;
}
template <class AnyCharsAccess>
bool TokenStreamChars<char16_t, AnyCharsAccess>::getNonAsciiCodePoint(
int32_t lead, int32_t* codePoint) {
MOZ_ASSERT(lead != EOF);
MOZ_ASSERT(!isAsciiCodePoint(lead),
"ASCII code unit/point must be handled separately");
MOZ_ASSERT(lead == this->sourceUnits.previousCodeUnit(),
"getNonAsciiCodePoint called incorrectly");
*codePoint = lead;
if (MOZ_LIKELY(!unicode::IsLeadSurrogate(lead))) {
if (MOZ_UNLIKELY(lead == unicode::LINE_SEPARATOR ||
lead == unicode::PARA_SEPARATOR)) {
if (!updateLineInfoForEOL()) {
#ifdef DEBUG
*codePoint = EOF; #endif
MOZ_MAKE_MEM_UNDEFINED(codePoint, sizeof(*codePoint));
return false;
}
*codePoint = '\n';
} else {
MOZ_ASSERT(!IsLineTerminator(AssertedCast<char32_t>(*codePoint)));
}
return true;
}
if (MOZ_UNLIKELY(
this->sourceUnits.atEnd() ||
!unicode::IsTrailSurrogate(this->sourceUnits.peekCodeUnit()))) {
MOZ_ASSERT(!IsLineTerminator(AssertedCast<char32_t>(*codePoint)));
return true;
}
*codePoint = unicode::UTF16Decode(lead, this->sourceUnits.getCodeUnit());
MOZ_ASSERT(!IsLineTerminator(AssertedCast<char32_t>(*codePoint)));
return true;
}
template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::getCodePoint(int32_t* cp) {
int32_t unit = getCodeUnit();
if (unit == EOF) {
MOZ_ASSERT(anyCharsAccess().flags.isEOF,
"flags.isEOF should have been set by getCodeUnit()");
*cp = EOF;
return true;
}
if (isAsciiCodePoint(unit)) {
return getFullAsciiCodePoint(unit, cp);
}
return getNonAsciiCodePoint(unit, cp);
}
template <class AnyCharsAccess>
bool TokenStreamChars<Utf8Unit, AnyCharsAccess>::getNonAsciiCodePoint(
int32_t unit, int32_t* codePoint) {
MOZ_ASSERT(unit != EOF);
MOZ_ASSERT(!isAsciiCodePoint(unit),
"ASCII code unit/point must be handled separately");
Utf8Unit lead = Utf8Unit(static_cast<unsigned char>(unit));
MOZ_ASSERT(lead == this->sourceUnits.previousCodeUnit(),
"getNonAsciiCodePoint called incorrectly");
auto onBadLeadUnit = [this, &lead]() { this->badLeadUnit(lead); };
auto onNotEnoughUnits = [this, &lead](uint_fast8_t remaining,
uint_fast8_t required) {
this->notEnoughUnits(lead, remaining, required);
};
auto onBadTrailingUnit = [this](uint_fast8_t unitsObserved) {
this->badTrailingUnit(unitsObserved);
};
auto onBadCodePoint = [this](char32_t badCodePoint,
uint_fast8_t unitsObserved) {
this->badCodePoint(badCodePoint, unitsObserved);
};
auto onNotShortestForm = [this](char32_t badCodePoint,
uint_fast8_t unitsObserved) {
this->notShortestForm(badCodePoint, unitsObserved);
};
SourceUnitsIterator iter(this->sourceUnits);
Maybe<char32_t> maybeCodePoint = DecodeOneUtf8CodePoint(
lead, &iter, SourceUnitsEnd(), onBadLeadUnit, onNotEnoughUnits,
onBadTrailingUnit, onBadCodePoint, onNotShortestForm);
if (maybeCodePoint.isNothing()) {
return false;
}
char32_t cp = maybeCodePoint.value();
if (MOZ_UNLIKELY(cp == unicode::LINE_SEPARATOR ||
cp == unicode::PARA_SEPARATOR)) {
if (!updateLineInfoForEOL()) {
#ifdef DEBUG
*codePoint = EOF; #endif
MOZ_MAKE_MEM_UNDEFINED(codePoint, sizeof(*codePoint));
return false;
}
*codePoint = '\n';
} else {
MOZ_ASSERT(!IsLineTerminator(cp));
*codePoint = AssertedCast<int32_t>(cp);
}
return true;
}
template <>
size_t SourceUnits<char16_t>::findWindowStart(size_t offset) const {
const char16_t* const earliestPossibleStart = codeUnitPtrAt(startOffset_);
const char16_t* const initial = codeUnitPtrAt(offset);
const char16_t* p = initial;
auto HalfWindowSize = [&p, &initial]() {
return PointerRangeSize(p, initial);
};
while (true) {
MOZ_ASSERT(earliestPossibleStart <= p);
MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
if (p <= earliestPossibleStart || HalfWindowSize() >= WindowRadius) {
break;
}
char16_t c = p[-1];
if (IsLineTerminator(c)) {
break;
}
if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
break;
}
p--;
if (MOZ_LIKELY(!unicode::IsTrailSurrogate(c))) {
continue;
}
if (HalfWindowSize() >= WindowRadius ||
p <= earliestPossibleStart || !unicode::IsLeadSurrogate(p[-1])) {
p++;
break;
}
p--;
}
MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
return offset - HalfWindowSize();
}
template <>
size_t SourceUnits<Utf8Unit>::findWindowStart(size_t offset) const {
const Utf8Unit* const earliestPossibleStart = codeUnitPtrAt(startOffset_);
const Utf8Unit* const initial = codeUnitPtrAt(offset);
const Utf8Unit* p = initial;
auto HalfWindowSize = [&p, &initial]() {
return PointerRangeSize(p, initial);
};
while (true) {
MOZ_ASSERT(earliestPossibleStart <= p);
MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
if (p <= earliestPossibleStart || HalfWindowSize() >= WindowRadius) {
break;
}
uint8_t prev = p[-1].toUint8();
if (prev == '\r' || prev == '\n') {
break;
}
if (MOZ_UNLIKELY((prev == 0xA8 || prev == 0xA9) &&
p[-2].toUint8() == 0x80 && p[-3].toUint8() == 0xE2)) {
break;
}
while (IsTrailingUnit(*--p)) {
continue;
}
MOZ_ASSERT(earliestPossibleStart <= p);
if (HalfWindowSize() > WindowRadius) {
static_assert(WindowRadius > 3,
"skipping over non-lead code units below must not "
"advance past |offset|");
while (IsTrailingUnit(*++p)) {
continue;
}
MOZ_ASSERT(HalfWindowSize() < WindowRadius);
break;
}
}
MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
return offset - HalfWindowSize();
}
template <>
size_t SourceUnits<char16_t>::findWindowEnd(size_t offset) const {
const char16_t* const initial = codeUnitPtrAt(offset);
const char16_t* p = initial;
auto HalfWindowSize = [&initial, &p]() {
return PointerRangeSize(initial, p);
};
while (true) {
MOZ_ASSERT(p <= limit_);
MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
if (p >= limit_ || HalfWindowSize() >= WindowRadius) {
break;
}
char16_t c = *p;
if (IsLineTerminator(c)) {
break;
}
if (MOZ_UNLIKELY(unicode::IsTrailSurrogate(c))) {
break;
}
p++;
if (MOZ_LIKELY(!unicode::IsLeadSurrogate(c))) {
continue;
}
if (HalfWindowSize() >= WindowRadius || p >= limit_ || !unicode::IsTrailSurrogate(*p)) {
p--;
break;
}
p++;
}
return offset + HalfWindowSize();
}
template <>
size_t SourceUnits<Utf8Unit>::findWindowEnd(size_t offset) const {
const Utf8Unit* const initial = codeUnitPtrAt(offset);
const Utf8Unit* p = initial;
auto HalfWindowSize = [&initial, &p]() {
return PointerRangeSize(initial, p);
};
while (true) {
MOZ_ASSERT(p <= limit_);
MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
if (p >= limit_ || HalfWindowSize() >= WindowRadius) {
break;
}
Utf8Unit lead = *p;
if (mozilla::IsAscii(lead)) {
if (IsSingleUnitLineTerminator(lead)) {
break;
}
p++;
continue;
}
PeekedCodePoint<Utf8Unit> peeked = PeekCodePoint(p, limit_);
if (peeked.isNone()) {
break; }
char32_t c = peeked.codePoint();
if (MOZ_UNLIKELY(c == unicode::LINE_SEPARATOR ||
c == unicode::PARA_SEPARATOR)) {
break;
}
MOZ_ASSERT(!IsLineTerminator(c));
uint8_t len = peeked.lengthInUnits();
if (HalfWindowSize() + len > WindowRadius) {
break;
}
p += len;
}
MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
return offset + HalfWindowSize();
}
template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::advance(size_t position) {
const Unit* end = this->sourceUnits.codeUnitPtrAt(position);
while (this->sourceUnits.addressOfNextCodeUnit() < end) {
int32_t c;
if (!getCodePoint(&c)) {
return false;
}
}
TokenStreamAnyChars& anyChars = anyCharsAccess();
Token* cur = const_cast<Token*>(&anyChars.currentToken());
cur->pos.begin = this->sourceUnits.offset();
cur->pos.end = cur->pos.begin;
MOZ_MAKE_MEM_UNDEFINED(&cur->type, sizeof(cur->type));
anyChars.lookahead = 0;
return true;
}
template <typename Unit, class AnyCharsAccess>
void TokenStreamSpecific<Unit, AnyCharsAccess>::seek(const Position& pos) {
TokenStreamAnyChars& anyChars = anyCharsAccess();
this->sourceUnits.setAddressOfNextCodeUnit(pos.buf,
true);
anyChars.flags = pos.flags;
anyChars.lineno = pos.lineno;
anyChars.linebase = pos.linebase;
anyChars.prevLinebase = pos.prevLinebase;
anyChars.lookahead = pos.lookahead;
anyChars.tokens[anyChars.cursor()] = pos.currentToken;
for (unsigned i = 0; i < anyChars.lookahead; i++) {
anyChars.tokens[anyChars.aheadCursor(1 + i)] = pos.lookaheadTokens[i];
}
}
template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::seek(
const Position& pos, const TokenStreamAnyChars& other) {
if (!anyCharsAccess().srcCoords.fill(other.srcCoords)) {
return false;
}
seek(pos);
return true;
}
void TokenStreamAnyChars::computeErrorMetadataNoOffset(ErrorMetadata* err) {
err->isMuted = mutedErrors;
err->filename = filename_;
err->lineNumber = 0;
err->columnNumber = 0;
MOZ_ASSERT(err->lineOfContext == nullptr);
}
bool TokenStreamAnyChars::fillExceptingContext(ErrorMetadata* err,
uint32_t offset) {
err->isMuted = mutedErrors;
if (!filename_ && !cx->helperThread()) {
NonBuiltinFrameIter iter(cx, FrameIter::FOLLOW_DEBUGGER_EVAL_PREV_LINK,
cx->realm()->principals());
if (!iter.done() && iter.filename()) {
err->filename = iter.filename();
err->lineNumber = iter.computeLine(&err->columnNumber);
return false;
}
}
err->filename = filename_;
return true;
}
template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::hasTokenizationStarted() const {
const TokenStreamAnyChars& anyChars = anyCharsAccess();
return anyChars.isCurrentTokenType(TokenKind::Eof) && !anyChars.isEOF();
}
template <>
inline void SourceUnits<char16_t>::computeWindowOffsetAndLength(
const char16_t* encodedWindow, size_t encodedTokenOffset,
size_t* utf16TokenOffset, size_t encodedWindowLength,
size_t* utf16WindowLength) {
MOZ_ASSERT_UNREACHABLE("shouldn't need to recompute for UTF-16");
}
template <>
inline void SourceUnits<Utf8Unit>::computeWindowOffsetAndLength(
const Utf8Unit* encodedWindow, size_t encodedTokenOffset,
size_t* utf16TokenOffset, size_t encodedWindowLength,
size_t* utf16WindowLength) {
MOZ_ASSERT(encodedTokenOffset <= encodedWindowLength,
"token offset must be within the window, and the two lambda "
"calls below presume this ordering of values");
const Utf8Unit* const encodedWindowEnd = encodedWindow + encodedWindowLength;
size_t i = 0;
auto ComputeUtf16Count = [&i, &encodedWindow](const Utf8Unit* limit) {
while (encodedWindow < limit) {
Utf8Unit lead = *encodedWindow++;
if (MOZ_LIKELY(IsAscii(lead))) {
i++;
continue;
}
Maybe<char32_t> cp = DecodeOneUtf8CodePoint(lead, &encodedWindow, limit);
MOZ_ASSERT(cp.isSome(),
"computed window should only contain valid UTF-8");
i += unicode::IsSupplementary(cp.value()) ? 2 : 1;
}
return i;
};
const Utf8Unit* token = encodedWindow + encodedTokenOffset;
MOZ_ASSERT(token <= encodedWindowEnd);
*utf16TokenOffset = ComputeUtf16Count(token);
*utf16WindowLength = ComputeUtf16Count(encodedWindowEnd);
}
template <typename Unit>
bool TokenStreamCharsBase<Unit>::addLineOfContext(ErrorMetadata* err,
uint32_t offset) {
size_t encodedOffset = offset;
size_t encodedWindowStart = sourceUnits.findWindowStart(encodedOffset);
size_t encodedWindowEnd = sourceUnits.findWindowEnd(encodedOffset);
size_t encodedWindowLength = encodedWindowEnd - encodedWindowStart;
MOZ_ASSERT(encodedWindowLength <= SourceUnits::WindowRadius * 2);
if (encodedWindowLength == 0) {
MOZ_ASSERT(err->lineOfContext == nullptr,
"ErrorMetadata::lineOfContext must be null so we don't "
"have to set the lineLength/tokenOffset fields");
return true;
}
this->charBuffer.clear();
const Unit* encodedWindow = sourceUnits.codeUnitPtrAt(encodedWindowStart);
if (!fillCharBufferFromSourceNormalizingAsciiLineBreaks(
encodedWindow, encodedWindow + encodedWindowLength)) {
return false;
}
size_t utf16WindowLength = this->charBuffer.length();
if (!this->charBuffer.append('\0')) {
return false;
}
err->lineOfContext.reset(this->charBuffer.extractOrCopyRawBuffer());
if (!err->lineOfContext) {
return false;
}
size_t encodedTokenOffset = encodedOffset - encodedWindowStart;
MOZ_ASSERT(encodedTokenOffset <= encodedWindowLength,
"token offset must be inside the window");
if (std::is_same<Unit, char16_t>::value) {
MOZ_ASSERT(utf16WindowLength == encodedWindowLength,
"UTF-16 to UTF-16 shouldn't change window length");
err->tokenOffset = encodedTokenOffset;
err->lineLength = encodedWindowLength;
} else {
MOZ_ASSERT((std::is_same<Unit, Utf8Unit>::value),
"should only see UTF-8 here");
bool simple = utf16WindowLength == encodedWindowLength;
MOZ_ASSERT(std::all_of(encodedWindow, encodedWindow + encodedWindowLength,
IsAscii<Unit>) == simple,
"equal window lengths in UTF-8 should correspond only to "
"wholly-ASCII text");
if (simple) {
err->tokenOffset = encodedTokenOffset;
err->lineLength = encodedWindowLength;
} else {
sourceUnits.computeWindowOffsetAndLength(
encodedWindow, encodedTokenOffset, &err->tokenOffset,
encodedWindowLength, &err->lineLength);
}
}
return true;
}
template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::computeErrorMetadata(
ErrorMetadata* err, const ErrorOffset& errorOffset) {
if (errorOffset.is<NoOffset>()) {
anyCharsAccess().computeErrorMetadataNoOffset(err);
return true;
}
uint32_t offset;
if (errorOffset.is<uint32_t>()) {
offset = errorOffset.as<uint32_t>();
} else {
offset = this->sourceUnits.offset();
}
if (fillExceptingContext(err, offset)) {
return internalComputeLineOfContext(err, offset);
}
return true;
}
template <typename Unit, class AnyCharsAccess>
uint32_t GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchUnicodeEscape(
uint32_t* codePoint) {
MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
int32_t unit = getCodeUnit();
if (unit != 'u') {
ungetCodeUnit(unit);
MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
return 0;
}
char16_t v;
unit = getCodeUnit();
if (JS7_ISHEX(unit) && this->sourceUnits.matchHexDigits(3, &v)) {
*codePoint = (JS7_UNHEX(unit) << 12) | v;
return 5;
}
if (unit == '{') {
return matchExtendedUnicodeEscape(codePoint);
}
ungetCodeUnit(unit);
ungetCodeUnit('u');
MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
return 0;
}
template <typename Unit, class AnyCharsAccess>
uint32_t
GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchExtendedUnicodeEscape(
uint32_t* codePoint) {
MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('{'));
int32_t unit = getCodeUnit();
uint32_t leadingZeroes = 0;
while (unit == '0') {
leadingZeroes++;
unit = getCodeUnit();
}
size_t i = 0;
uint32_t code = 0;
while (JS7_ISHEX(unit) && i < 6) {
code = (code << 4) | JS7_UNHEX(unit);
unit = getCodeUnit();
i++;
}
uint32_t gotten =
2 + leadingZeroes + i + (unit != EOF);
if (unit == '}' && (leadingZeroes > 0 || i > 0) &&
code <= unicode::NonBMPMax) {
*codePoint = code;
return gotten;
}
this->sourceUnits.unskipCodeUnits(gotten);
MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
return 0;
}
template <typename Unit, class AnyCharsAccess>
uint32_t
GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchUnicodeEscapeIdStart(
uint32_t* codePoint) {
uint32_t length = matchUnicodeEscape(codePoint);
if (MOZ_LIKELY(length > 0)) {
if (MOZ_LIKELY(unicode::IsIdentifierStart(*codePoint))) {
return length;
}
this->sourceUnits.unskipCodeUnits(length);
}
MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
return 0;
}
template <typename Unit, class AnyCharsAccess>
bool GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchUnicodeEscapeIdent(
uint32_t* codePoint) {
uint32_t length = matchUnicodeEscape(codePoint);
if (MOZ_LIKELY(length > 0)) {
if (MOZ_LIKELY(unicode::IsIdentifierPart(*codePoint))) {
return true;
}
this->sourceUnits.unskipCodeUnits(length);
}
MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
return false;
}
template <typename Unit, class AnyCharsAccess>
MOZ_MUST_USE bool
TokenStreamSpecific<Unit, AnyCharsAccess>::matchIdentifierStart(
IdentifierEscapes* sawEscape) {
int32_t unit = getCodeUnit();
if (unicode::IsIdentifierStart(char16_t(unit))) {
*sawEscape = IdentifierEscapes::None;
return true;
}
if (unit == '\\') {
*sawEscape = IdentifierEscapes::SawUnicodeEscape;
uint32_t codePoint;
uint32_t escapeLength = matchUnicodeEscapeIdStart(&codePoint);
if (escapeLength != 0) {
return true;
}
ungetCodeUnit('\\');
error(JSMSG_BAD_ESCAPE);
return false;
}
*sawEscape = IdentifierEscapes::None;
ungetCodeUnit(unit);
error(JSMSG_MISSING_PRIVATE_NAME);
return false;
}
template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::getDirectives(
bool isMultiline, bool shouldWarnDeprecated) {
bool res = getDisplayURL(isMultiline, shouldWarnDeprecated) &&
getSourceMappingURL(isMultiline, shouldWarnDeprecated);
if (!res) {
badToken();
}
return res;
}
MOZ_MUST_USE bool TokenStreamCharsShared::copyCharBufferTo(
JSContext* cx, UniquePtr<char16_t[], JS::FreePolicy>* destination) {
size_t length = charBuffer.length();
*destination = cx->make_pod_array<char16_t>(length + 1);
if (!*destination) {
return false;
}
std::copy(charBuffer.begin(), charBuffer.end(), destination->get());
(*destination)[length] = '\0';
return true;
}
template <typename Unit, class AnyCharsAccess>
MOZ_MUST_USE bool TokenStreamSpecific<Unit, AnyCharsAccess>::getDirective(
bool isMultiline, bool shouldWarnDeprecated, const char* directive,
uint8_t directiveLength, const char* errorMsgPragma,
UniquePtr<char16_t[], JS::FreePolicy>* destination) {
if (!this->sourceUnits.matchCodeUnits(directive, directiveLength)) {
return true;
}
if (shouldWarnDeprecated) {
if (!warning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma)) {
return false;
}
}
this->charBuffer.clear();
do {
int32_t unit = peekCodeUnit();
if (unit == EOF) {
break;
}
if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
if (unicode::IsSpaceOrBOM2(unit)) {
break;
}
consumeKnownCodeUnit(unit);
if (isMultiline && unit == '*' && peekCodeUnit() == '/') {
ungetCodeUnit('*');
break;
}
if (!this->charBuffer.append(unit)) {
return false;
}
continue;
}
PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();
if (peeked.isNone() || unicode::IsSpaceOrBOM2(peeked.codePoint())) {
break;
}
MOZ_ASSERT(!IsLineTerminator(peeked.codePoint()),
"!IsSpaceOrBOM2 must imply !IsLineTerminator or else we'll "
"fail to maintain line-info/flags for EOL");
this->sourceUnits.consumeKnownCodePoint(peeked);
if (!appendCodePointToCharBuffer(peeked.codePoint())) {
return false;
}
} while (true);
if (this->charBuffer.empty()) {
return true;
}
return copyCharBufferTo(anyCharsAccess().cx, destination);
}
template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::getDisplayURL(
bool isMultiline, bool shouldWarnDeprecated) {
static const char sourceURLDirective[] = " sourceURL=";
constexpr uint8_t sourceURLDirectiveLength =
ArrayLength(sourceURLDirective) - 1;
return getDirective(isMultiline, shouldWarnDeprecated, sourceURLDirective,
sourceURLDirectiveLength, "sourceURL",
&anyCharsAccess().displayURL_);
}
template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::getSourceMappingURL(
bool isMultiline, bool shouldWarnDeprecated) {
static const char sourceMappingURLDirective[] = " sourceMappingURL=";
constexpr uint8_t sourceMappingURLDirectiveLength =
ArrayLength(sourceMappingURLDirective) - 1;
return getDirective(isMultiline, shouldWarnDeprecated,
sourceMappingURLDirective,
sourceMappingURLDirectiveLength, "sourceMappingURL",
&anyCharsAccess().sourceMapURL_);
}
template <typename Unit, class AnyCharsAccess>
MOZ_ALWAYS_INLINE Token*
GeneralTokenStreamChars<Unit, AnyCharsAccess>::newTokenInternal(
TokenKind kind, TokenStart start, TokenKind* out) {
MOZ_ASSERT(kind < TokenKind::Limit);
MOZ_ASSERT(kind != TokenKind::Eol,
"TokenKind::Eol should never be used in an actual Token, only "
"returned by peekTokenSameLine()");
TokenStreamAnyChars& anyChars = anyCharsAccess();
anyChars.flags.isDirtyLine = true;
Token* token = anyChars.allocateToken();
*out = token->type = kind;
token->pos = TokenPos(start.offset(), this->sourceUnits.offset());
MOZ_ASSERT(token->pos.begin <= token->pos.end);
return token;
}
template <typename Unit, class AnyCharsAccess>
MOZ_COLD bool GeneralTokenStreamChars<Unit, AnyCharsAccess>::badToken() {
anyCharsAccess().flags.hadError = true;
this->sourceUnits.poisonInDebug();
return false;
};
MOZ_MUST_USE bool TokenStreamCharsShared::appendCodePointToCharBuffer(
uint32_t codePoint) {
char16_t units[2];
unsigned numUnits = 0;
unicode::UTF16Encode(codePoint, units, &numUnits);
MOZ_ASSERT(numUnits == 1 || numUnits == 2,
"UTF-16 code points are only encoded in one or two units");
if (!charBuffer.append(units[0])) {
return false;
}
if (numUnits == 1) {
return true;
}
return charBuffer.append(units[1]);
}
template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::putIdentInCharBuffer(
const Unit* identStart) {
const Unit* const originalAddress = this->sourceUnits.addressOfNextCodeUnit();
this->sourceUnits.setAddressOfNextCodeUnit(identStart);
auto restoreNextRawCharAddress = MakeScopeExit([this, originalAddress]() {
this->sourceUnits.setAddressOfNextCodeUnit(originalAddress);
});
this->charBuffer.clear();
do {
int32_t unit = getCodeUnit();
if (unit == EOF) {
break;
}
uint32_t codePoint;
if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
if (unicode::IsIdentifierPart(char16_t(unit))) {
if (!this->charBuffer.append(unit)) {
return false;
}
continue;
}
if (unit != '\\' || !matchUnicodeEscapeIdent(&codePoint)) {
break;
}
} else {
char32_t cp;
if (!getNonAsciiCodePointDontNormalize(toUnit(unit), &cp)) {
return false;
}
codePoint = cp;
if (!unicode::IsIdentifierPart(codePoint)) {
break;
}
}
if (!appendCodePointToCharBuffer(codePoint)) {
return false;
}
} while (true);
return true;
}
template <typename Unit, class AnyCharsAccess>
MOZ_MUST_USE bool TokenStreamSpecific<Unit, AnyCharsAccess>::identifierName(
TokenStart start, const Unit* identStart, IdentifierEscapes escaping,
Modifier modifier, NameVisibility visibility, TokenKind* out) {
auto noteBadToken = MakeScopeExit([this]() { this->badToken(); });
int32_t unit;
while (true) {
unit = peekCodeUnit();
if (unit == EOF) {
break;
}
if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
consumeKnownCodeUnit(unit);
if (MOZ_UNLIKELY(
!unicode::IsIdentifierPart(static_cast<char16_t>(unit)))) {
uint32_t codePoint;
if (unit != '\\' || !matchUnicodeEscapeIdent(&codePoint)) {
ungetCodeUnit(unit);
break;
}
escaping = IdentifierEscapes::SawUnicodeEscape;
}
} else {
PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();
if (peeked.isNone() || !unicode::IsIdentifierPart(peeked.codePoint())) {
break;
}
MOZ_ASSERT(!IsLineTerminator(peeked.codePoint()),
"IdentifierPart must guarantee !IsLineTerminator or "
"else we'll fail to maintain line-info/flags for EOL");
this->sourceUnits.consumeKnownCodePoint(peeked);
}
}
JSAtom* atom;
if (MOZ_UNLIKELY(escaping == IdentifierEscapes::SawUnicodeEscape)) {
if (!putIdentInCharBuffer(identStart)) {
return false;
}
atom = drainCharBufferIntoAtom(anyCharsAccess().cx);
} else {
const Unit* chars = identStart;
size_t length = this->sourceUnits.addressOfNextCodeUnit() - identStart;
if (visibility == NameVisibility::Public) {
if (const ReservedWordInfo* rw = FindReservedWord(chars, length)) {
noteBadToken.release();
newSimpleToken(rw->tokentype, start, modifier, out);
return true;
}
}
atom = atomizeSourceChars(anyCharsAccess().cx, MakeSpan(chars, length));
}
if (!atom) {
return false;
}
noteBadToken.release();
if (visibility == NameVisibility::Private) {
MOZ_ASSERT(identStart[0] == static_cast<Unit>('#'),
"Private identifier starts with #");
newPrivateNameToken(atom->asPropertyName(), start, modifier, out);
if (!anyCharsAccess().options().fieldsEnabledOption) {
errorAt(start.offset(), JSMSG_FIELDS_NOT_SUPPORTED);
return false;
}
} else {
newNameToken(atom->asPropertyName(), start, modifier, out);
}
return true;
}
enum FirstCharKind {
OneChar_Min = 0,
OneChar_Max = size_t(TokenKind::Limit) - 1,
Space = size_t(TokenKind::Limit),
Ident,
Dec,
String,
EOL,
ZeroDigit,
Other,
LastCharKind = Other
};
#define T_COMMA size_t(TokenKind::Comma)
#define T_COLON size_t(TokenKind::Colon)
#define T_BITNOT size_t(TokenKind::BitNot)
#define T_LP size_t(TokenKind::LeftParen)
#define T_RP size_t(TokenKind::RightParen)
#define T_SEMI size_t(TokenKind::Semi)
#define T_HOOK size_t(TokenKind::Hook)
#define T_LB size_t(TokenKind::LeftBracket)
#define T_RB size_t(TokenKind::RightBracket)
#define T_LC size_t(TokenKind::LeftCurly)
#define T_RC size_t(TokenKind::RightCurly)
#define _______ Other
static const uint8_t firstCharKinds[] = {
_______, _______, _______, _______, _______, _______, _______, _______, _______, Space,
EOL, Space, Space, EOL, _______, _______, _______, _______, _______, _______,
_______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
_______, _______, Space, _______, String, _______, Ident, _______, _______, String,
T_LP, T_RP, _______, _______, T_COMMA, _______, _______, _______,ZeroDigit, Dec,
Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, T_COLON, T_SEMI,
_______, _______, _______, T_HOOK, _______, Ident, Ident, Ident, Ident, Ident,
Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
Ident, T_LB, _______, T_RB, _______, Ident, String, Ident, Ident, Ident,
Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
Ident, Ident, Ident, T_LC, _______, T_RC,T_BITNOT, _______
};
#undef T_COMMA
#undef T_COLON
#undef T_BITNOT
#undef T_LP
#undef T_RP
#undef T_SEMI
#undef T_HOOK
#undef T_LB
#undef T_RB
#undef T_LC
#undef T_RC
#undef _______
static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
"Elements of firstCharKinds[] are too small");
template <>
void SourceUnits<char16_t>::consumeRestOfSingleLineComment() {
while (MOZ_LIKELY(!atEnd())) {
char16_t unit = peekCodeUnit();
if (IsLineTerminator(unit)) {
return;
}
consumeKnownCodeUnit(unit);
}
}
template <>
void SourceUnits<Utf8Unit>::consumeRestOfSingleLineComment() {
while (MOZ_LIKELY(!atEnd())) {
const Utf8Unit unit = peekCodeUnit();
if (IsSingleUnitLineTerminator(unit)) {
return;
}
if (MOZ_LIKELY(IsAscii(unit))) {
consumeKnownCodeUnit(unit);
continue;
}
PeekedCodePoint<Utf8Unit> peeked = peekCodePoint();
if (peeked.isNone()) {
return;
}
char32_t c = peeked.codePoint();
if (MOZ_UNLIKELY(c == unicode::LINE_SEPARATOR ||
c == unicode::PARA_SEPARATOR)) {
return;
}
consumeKnownCodePoint(peeked);
}
}
template <typename Unit, class AnyCharsAccess>
MOZ_MUST_USE bool TokenStreamSpecific<Unit, AnyCharsAccess>::decimalNumber(
int32_t unit, TokenStart start, const Unit* numStart, Modifier modifier,
TokenKind* out) {
auto noteBadToken = MakeScopeExit([this]() { this->badToken(); });
while (IsAsciiDigit(unit)) {
unit = getCodeUnit();
}
double dval;
bool isBigInt = false;
DecimalPoint decimalPoint = NoDecimal;
if (unit != '.' && unit != 'e' && unit != 'E' && unit != 'n') {
ungetCodeUnit(unit);
if (!GetDecimalInteger(anyCharsAccess().cx, numStart,
this->sourceUnits.addressOfNextCodeUnit(), &dval)) {
return false;
}
} else if (unit == 'n' && anyCharsAccess().options().bigIntEnabledOption) {
isBigInt = true;
unit = peekCodeUnit();
} else {
if (unit == '.') {
decimalPoint = HasDecimal;
do {
unit = getCodeUnit();
} while (IsAsciiDigit(unit));
}
if (unit == 'e' || unit == 'E') {
unit = getCodeUnit();
if (unit == '+' || unit == '-') {
unit = getCodeUnit();
}
if (!IsAsciiDigit(unit)) {
ungetCodeUnit(unit);
error(JSMSG_MISSING_EXPONENT);
return false;
}
do {
unit = getCodeUnit();
} while (IsAsciiDigit(unit));
}
ungetCodeUnit(unit);
if (!StringToDouble(anyCharsAccess().cx, numStart,
this->sourceUnits.addressOfNextCodeUnit(), &dval)) {
return false;
}
}
if (unit != EOF) {
if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
if (unicode::IsIdentifierStart(char16_t(unit))) {
error(JSMSG_IDSTART_AFTER_NUMBER);
return false;
}
} else {
PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();
if (!peeked.isNone() && unicode::IsIdentifierStart(peeked.codePoint())) {
error(JSMSG_IDSTART_AFTER_NUMBER);
return false;
}
}
}
noteBadToken.release();
if (isBigInt) {
return bigIntLiteral(start, modifier, out);
}
newNumberToken(dval, decimalPoint, start, modifier, out);
return true;
}
template <typename Unit, class AnyCharsAccess>
MOZ_MUST_USE bool TokenStreamSpecific<Unit, AnyCharsAccess>::regexpLiteral(
TokenStart start, TokenKind* out) {
MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('/'));
this->charBuffer.clear();
auto ProcessNonAsciiCodePoint = [this](int32_t lead) {
MOZ_ASSERT(lead != EOF);
MOZ_ASSERT(!this->isAsciiCodePoint(lead));
char32_t codePoint;
if (!this->getNonAsciiCodePointDontNormalize(this->toUnit(lead),
&codePoint)) {
return false;
}
if (MOZ_UNLIKELY(codePoint == unicode::LINE_SEPARATOR ||
codePoint == unicode::PARA_SEPARATOR)) {
this->sourceUnits.ungetLineOrParagraphSeparator();
this->error(JSMSG_UNTERMINATED_REGEXP);
return false;
}
return this->appendCodePointToCharBuffer(codePoint);
};
auto ReportUnterminatedRegExp = [this](int32_t unit) {
this->ungetCodeUnit(unit);
this->error(JSMSG_UNTERMINATED_REGEXP);
};
bool inCharClass = false;
do {
int32_t unit = getCodeUnit();
if (unit == EOF) {
ReportUnterminatedRegExp(unit);
return badToken();
}
if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
if (!ProcessNonAsciiCodePoint(unit)) {
return badToken();
}
continue;
}
if (unit == '\\') {
if (!this->charBuffer.append(unit)) {
return badToken();
}
unit = getCodeUnit();
if (unit == EOF) {
ReportUnterminatedRegExp(unit);
return badToken();
}
if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
if (!ProcessNonAsciiCodePoint(unit)) {
return badToken();
}
continue;
}
} else if (unit == '[') {
inCharClass = true;
} else if (unit == ']') {
inCharClass = false;
} else if (unit == '/' && !inCharClass) {
break;
}
if (unit == '\r' || unit == '\n') {
ReportUnterminatedRegExp(unit);
return badToken();
}
MOZ_ASSERT(!IsLineTerminator(AssertedCast<char32_t>(unit)));
if (!this->charBuffer.append(unit)) {
return badToken();
}
} while (true);
int32_t unit;
RegExpFlag reflags = NoFlags;
while (true) {
RegExpFlag flag;
unit = getCodeUnit();
if (unit == 'g') {
flag = GlobalFlag;
} else if (unit == 'i') {
flag = IgnoreCaseFlag;
} else if (unit == 'm') {
flag = MultilineFlag;
} else if (unit == 'y') {
flag = StickyFlag;
} else if (unit == 'u') {
flag = UnicodeFlag;
} else if (IsAsciiAlpha(unit)) {
flag = NoFlags;
} else {
break;
}
if ((reflags & flag) || flag == NoFlags) {
ungetCodeUnit(unit);
char buf[2] = {char(unit), '\0'};
error(JSMSG_BAD_REGEXP_FLAG, buf);
return badToken();
}
reflags = RegExpFlag(reflags | flag);
}
ungetCodeUnit(unit);
newRegExpToken(reflags, start, out);
return true;
}
template <typename Unit, class AnyCharsAccess>
MOZ_MUST_USE bool TokenStreamSpecific<Unit, AnyCharsAccess>::bigIntLiteral(
TokenStart start, Modifier modifier, TokenKind* out) {
MOZ_ASSERT(anyCharsAccess().options().bigIntEnabledOption);
MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == toUnit('n'));
MOZ_ASSERT(this->sourceUnits.offset() > start.offset());
uint32_t length = this->sourceUnits.offset() - start.offset();
MOZ_ASSERT(length >= 2);
this->charBuffer.clear();
mozilla::Range<const Unit> chars(
this->sourceUnits.codeUnitPtrAt(start.offset()), length);
for (uint32_t idx = 0; idx < length - 1; idx++) {
int32_t unit = CodeUnitValue(chars[idx]);
MOZ_ASSERT(isAsciiCodePoint(unit));
if (!this->appendCodePointToCharBuffer(unit)) {
return false;
}
}
newBigIntToken(start, modifier, out);
return true;
}
template <typename Unit, class AnyCharsAccess>
void GeneralTokenStreamChars<Unit,
AnyCharsAccess>::consumeOptionalHashbangComment() {
MOZ_ASSERT(this->sourceUnits.atStart(),
"HashBangComment can only appear immediately at the start of a "
"Script or Module");
if (!matchCodeUnit('#')) {
return;
}
if (!matchCodeUnit('!')) {
ungetCodeUnit('#');
return;
}
this->sourceUnits.consumeRestOfSingleLineComment();
}
template <typename Unit, class AnyCharsAccess>
MOZ_MUST_USE bool TokenStreamSpecific<Unit, AnyCharsAccess>::getTokenInternal(
TokenKind* const ttp, const Modifier modifier) {
#ifdef DEBUG
*ttp = TokenKind::Limit;
#endif
MOZ_MAKE_MEM_UNDEFINED(ttp, sizeof(*ttp));
if (MOZ_UNLIKELY(modifier == TemplateTail)) {
return getStringOrTemplateToken('`', modifier, ttp);
}
do {
int32_t unit = peekCodeUnit();
if (MOZ_UNLIKELY(unit == EOF)) {
MOZ_ASSERT(this->sourceUnits.atEnd());
anyCharsAccess().flags.isEOF = true;
TokenStart start(this->sourceUnits, 0);
newSimpleToken(TokenKind::Eof, start, modifier, ttp);
return true;
}
if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
TokenStart start(this->sourceUnits, 0);
const Unit* identStart = this->sourceUnits.addressOfNextCodeUnit();
PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();
if (peeked.isNone()) {
int32_t bad;
MOZ_ALWAYS_FALSE(getCodePoint(&bad));
return badToken();
}
char32_t cp = peeked.codePoint();
if (unicode::IsSpaceOrBOM2(cp)) {
this->sourceUnits.consumeKnownCodePoint(peeked);
if (IsLineTerminator(cp)) {
if (!updateLineInfoForEOL()) {
return badToken();
}
anyCharsAccess().updateFlagsForEOL();
}
continue;
}
static_assert(isAsciiCodePoint('$'),
"IdentifierStart contains '$', but as "
"!IsUnicodeIDStart('$'), ensure that '$' is never "
"handled here");
static_assert(isAsciiCodePoint('_'),
"IdentifierStart contains '_', but as "
"!IsUnicodeIDStart('_'), ensure that '_' is never "
"handled here");
if (MOZ_LIKELY(unicode::IsUnicodeIDStart(cp))) {
this->sourceUnits.consumeKnownCodePoint(peeked);
MOZ_ASSERT(!IsLineTerminator(cp),
"IdentifierStart must guarantee !IsLineTerminator "
"or else we'll fail to maintain line-info/flags "
"for EOL here");
return identifierName(start, identStart, IdentifierEscapes::None,
modifier, NameVisibility::Public, ttp);
}
error(JSMSG_ILLEGAL_CHARACTER);
return badToken();
}
consumeKnownCodeUnit(unit);
FirstCharKind c1kind = FirstCharKind(firstCharKinds[unit]);
if (c1kind <= OneChar_Max) {
TokenStart start(this->sourceUnits, -1);
newSimpleToken(TokenKind(c1kind), start, modifier, ttp);
return true;
}
if (c1kind == Space) {
continue;
}
if (c1kind == Ident) {
TokenStart start(this->sourceUnits, -1);
return identifierName(
start, this->sourceUnits.addressOfNextCodeUnit() - 1,
IdentifierEscapes::None, modifier, NameVisibility::Public, ttp);
}
if (c1kind == Dec) {
TokenStart start(this->sourceUnits, -1);
const Unit* numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;
return decimalNumber(unit, start, numStart, modifier, ttp);
}
if (c1kind == String) {
return getStringOrTemplateToken(static_cast<char>(unit), modifier, ttp);
}
if (c1kind == EOL) {
if (unit == '\r') {
matchLineTerminator('\n');
}
if (!updateLineInfoForEOL()) {
return badToken();
}
anyCharsAccess().updateFlagsForEOL();
continue;
}
if (c1kind == ZeroDigit) {
TokenStart start(this->sourceUnits, -1);
int radix;
bool isLegacyOctalOrNoctal = false;
bool isBigInt = false;
const Unit* numStart;
unit = getCodeUnit();
if (unit == 'x' || unit == 'X') {
radix = 16;
unit = getCodeUnit();
if (!JS7_ISHEX(unit)) {
ungetCodeUnit(unit);
error(JSMSG_MISSING_HEXDIGITS);
return badToken();
}
numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;
while (JS7_ISHEX(unit)) {
unit = getCodeUnit();
}
} else if (unit == 'b' || unit == 'B') {
radix = 2;
unit = getCodeUnit();
if (unit != '0' && unit != '1') {
ungetCodeUnit(unit);
error(JSMSG_MISSING_BINARY_DIGITS);
return badToken();
}
numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;
while (unit == '0' || unit == '1') {
unit = getCodeUnit();
}
} else if (unit == 'o' || unit == 'O') {
radix = 8;
unit = getCodeUnit();
if (!JS7_ISOCT(unit)) {
ungetCodeUnit(unit);
error(JSMSG_MISSING_OCTAL_DIGITS);
return badToken();
}
numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;
while (JS7_ISOCT(unit)) {
unit = getCodeUnit();
}
} else if (IsAsciiDigit(unit)) {
radix = 8;
isLegacyOctalOrNoctal = true;
numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;
do {
if (!strictModeError(JSMSG_DEPRECATED_OCTAL)) {
return badToken();
}
if (unit >= '8') {
if (!warning(JSMSG_BAD_OCTAL, unit == '8' ? "08" : "09")) {
return badToken();
}
return decimalNumber(unit, start, numStart, modifier, ttp);
}
unit = getCodeUnit();
} while (IsAsciiDigit(unit));
} else {
numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;
return decimalNumber(unit, start, numStart, modifier, ttp);
}
if (unit == 'n' && anyCharsAccess().options().bigIntEnabledOption) {
if (isLegacyOctalOrNoctal) {
error(JSMSG_BIGINT_INVALID_SYNTAX);
return badToken();
}
isBigInt = true;
unit = peekCodeUnit();
} else {
ungetCodeUnit(unit);
}
if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
if (unicode::IsIdentifierStart(char16_t(unit))) {
error(JSMSG_IDSTART_AFTER_NUMBER);
return badToken();
}
} else if (MOZ_LIKELY(unit != EOF)) {
PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();
if (!peeked.isNone() &&
unicode::IsIdentifierStart(peeked.codePoint())) {
error(JSMSG_IDSTART_AFTER_NUMBER);
return badToken();
}
}
if (isBigInt) {
return bigIntLiteral(start, modifier, ttp);
}
double dval;
if (!GetFullInteger(anyCharsAccess().cx, numStart,
this->sourceUnits.addressOfNextCodeUnit(), radix,
&dval)) {
return badToken();
}
newNumberToken(dval, NoDecimal, start, modifier, ttp);
return true;
}
MOZ_ASSERT(c1kind == Other);
TokenStart start(this->sourceUnits, -1);
TokenKind simpleKind;
#ifdef DEBUG
simpleKind = TokenKind::Limit; #endif
switch (AssertedCast<uint8_t>(CodeUnitValue(toUnit(unit)))) {
case '.':
unit = getCodeUnit();
if (IsAsciiDigit(unit)) {
return decimalNumber('.', start,
this->sourceUnits.addressOfNextCodeUnit() - 2,
modifier, ttp);
}
if (unit == '.') {
if (matchCodeUnit('.')) {
simpleKind = TokenKind::TripleDot;
break;
}
}
ungetCodeUnit(unit);
simpleKind = TokenKind::Dot;
break;
case '#': {
TokenStart start(this->sourceUnits, -1);
const Unit* identStart = this->sourceUnits.addressOfNextCodeUnit() - 1;
IdentifierEscapes sawEscape;
if (!matchIdentifierStart(&sawEscape)) {
return badToken();
}
return identifierName(start, identStart, sawEscape, modifier,
NameVisibility::Private, ttp);
}
case '=':
if (matchCodeUnit('=')) {
simpleKind = matchCodeUnit('=') ? TokenKind::StrictEq : TokenKind::Eq;
} else if (matchCodeUnit('>')) {
simpleKind = TokenKind::Arrow;
} else {
simpleKind = TokenKind::Assign;
}
break;
case '+':
if (matchCodeUnit('+')) {
simpleKind = TokenKind::Inc;
} else {
simpleKind =
matchCodeUnit('=') ? TokenKind::AddAssign : TokenKind::Add;
}
break;
case '\\': {
uint32_t codePoint;
if (uint32_t escapeLength = matchUnicodeEscapeIdStart(&codePoint)) {
return identifierName(
start,
this->sourceUnits.addressOfNextCodeUnit() - escapeLength - 1,
IdentifierEscapes::SawUnicodeEscape, modifier,
NameVisibility::Public, ttp);
}
ungetCodeUnit('\\');
error(JSMSG_BAD_ESCAPE);
return badToken();
}
case '|':
if (matchCodeUnit('|')) {
simpleKind = TokenKind::Or;
#ifdef ENABLE_PIPELINE_OPERATOR
} else if (matchCodeUnit('>')) {
simpleKind = TokenKind::Pipeline;
#endif
} else {
simpleKind =
matchCodeUnit('=') ? TokenKind::BitOrAssign : TokenKind::BitOr;
}
break;
case '^':
simpleKind =
matchCodeUnit('=') ? TokenKind::BitXorAssign : TokenKind::BitXor;
break;
case '&':
if (matchCodeUnit('&')) {
simpleKind = TokenKind::And;
} else {
simpleKind =
matchCodeUnit('=') ? TokenKind::BitAndAssign : TokenKind::BitAnd;
}
break;
case '!':
if (matchCodeUnit('=')) {
simpleKind = matchCodeUnit('=') ? TokenKind::StrictNe : TokenKind::Ne;
} else {
simpleKind = TokenKind::Not;
}
break;
case '<':
if (anyCharsAccess().options().allowHTMLComments) {
if (matchCodeUnit('!')) {
if (matchCodeUnit('-')) {
if (matchCodeUnit('-')) {
this->sourceUnits.consumeRestOfSingleLineComment();
continue;
}
ungetCodeUnit('-');
}
ungetCodeUnit('!');
}
}
if (matchCodeUnit('<')) {
simpleKind =
matchCodeUnit('=') ? TokenKind::LshAssign : TokenKind::Lsh;
} else {
simpleKind = matchCodeUnit('=') ? TokenKind::Le : TokenKind::Lt;
}
break;
case '>':
if (matchCodeUnit('>')) {
if (matchCodeUnit('>')) {
simpleKind =
matchCodeUnit('=') ? TokenKind::UrshAssign : TokenKind::Ursh;
} else {
simpleKind =
matchCodeUnit('=') ? TokenKind::RshAssign : TokenKind::Rsh;
}
} else {
simpleKind = matchCodeUnit('=') ? TokenKind::Ge : TokenKind::Gt;
}
break;
case '*':
if (matchCodeUnit('*')) {
simpleKind =
matchCodeUnit('=') ? TokenKind::PowAssign : TokenKind::Pow;
} else {
simpleKind =
matchCodeUnit('=') ? TokenKind::MulAssign : TokenKind::Mul;
}
break;
case '/':
if (matchCodeUnit('/')) {
unit = getCodeUnit();
if (unit == '@' || unit == '#') {
bool shouldWarn = unit == '@';
if (!getDirectives(false, shouldWarn)) {
return false;
}
} else {
ungetCodeUnit(unit);
}
this->sourceUnits.consumeRestOfSingleLineComment();
continue;
}
if (matchCodeUnit('*')) {
TokenStreamAnyChars& anyChars = anyCharsAccess();
unsigned linenoBefore = anyChars.lineno;
do {
int32_t unit = getCodeUnit();
if (unit == EOF) {
error(JSMSG_UNTERMINATED_COMMENT);
return badToken();
}
if (unit == '*' && matchCodeUnit('/')) {
break;
}
if (unit == '@' || unit == '#') {
bool shouldWarn = unit == '@';
if (!getDirectives(true, shouldWarn)) {
return badToken();
}
} else if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
int32_t codePoint;
if (!getFullAsciiCodePoint(unit, &codePoint)) {
return badToken();
}
} else {
int32_t codePoint;
if (!getNonAsciiCodePoint(unit, &codePoint)) {
return badToken();
}
}
} while (true);
if (linenoBefore != anyChars.lineno) {
anyChars.updateFlagsForEOL();
}
continue;
}
if (modifier == Operand) {
return regexpLiteral(start, ttp);
}
simpleKind = matchCodeUnit('=') ? TokenKind::DivAssign : TokenKind::Div;
break;
case '%':
simpleKind = matchCodeUnit('=') ? TokenKind::ModAssign : TokenKind::Mod;
break;
case '-':
if (matchCodeUnit('-')) {
if (anyCharsAccess().options().allowHTMLComments &&
!anyCharsAccess().flags.isDirtyLine) {
if (matchCodeUnit('>')) {
this->sourceUnits.consumeRestOfSingleLineComment();
continue;
}
}
simpleKind = TokenKind::Dec;
} else {
simpleKind =
matchCodeUnit('=') ? TokenKind::SubAssign : TokenKind::Sub;
}
break;
default:
ungetCodeUnit(unit);
error(JSMSG_ILLEGAL_CHARACTER);
return badToken();
}
MOZ_ASSERT(simpleKind != TokenKind::Limit,
"switch-statement should have set |simpleKind| before "
"breaking");
newSimpleToken(simpleKind, start, modifier, ttp);
return true;
} while (true);
}
template <typename Unit, class AnyCharsAccess>
bool TokenStreamSpecific<Unit, AnyCharsAccess>::getStringOrTemplateToken(
char untilChar, Modifier modifier, TokenKind* out) {
MOZ_ASSERT(untilChar == '\'' || untilChar == '"' || untilChar == '`',
"unexpected string/template literal delimiter");
bool parsingTemplate = (untilChar == '`');
bool templateHead = false;
TokenStart start(this->sourceUnits, -1);
this->charBuffer.clear();
auto noteBadToken = MakeScopeExit([this]() { this->badToken(); });
auto ReportPrematureEndOfLiteral = [this, untilChar](unsigned errnum) {
MOZ_ASSERT(this->sourceUnits.atEnd() ||
this->sourceUnits.peekCodeUnit() == Unit('\r') ||
this->sourceUnits.peekCodeUnit() == Unit('\n'),
"must be parked at EOF or EOL to call this function");
const char delimiters[] = {untilChar, untilChar, '\0'};
this->error(errnum, delimiters);
return;
};
int32_t unit;
while ((unit = getCodeUnit()) != untilChar) {
if (unit == EOF) {
ReportPrematureEndOfLiteral(JSMSG_EOF_BEFORE_END_OF_LITERAL);
return false;
}
if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
char32_t cp;
if (!getNonAsciiCodePointDontNormalize(toUnit(unit), &cp)) {
return false;
}
if (MOZ_UNLIKELY(cp == unicode::LINE_SEPARATOR ||
cp == unicode::PARA_SEPARATOR)) {
if (!updateLineInfoForEOL()) {
return false;
}
anyCharsAccess().updateFlagsForEOL();
} else {
MOZ_ASSERT(!IsLineTerminator(cp));
}
if (!appendCodePointToCharBuffer(cp)) {
return false;
}
continue;
}
if (unit == '\\') {
unit = getCodeUnit();
if (unit == EOF) {
ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
return false;
}
if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
int32_t codePoint;
if (!getNonAsciiCodePoint(unit, &codePoint)) {
return false;
}
if (codePoint != '\n') {
if (!appendCodePointToCharBuffer(AssertedCast<char32_t>(codePoint))) {
return false;
}
}
continue;
}
switch (AssertedCast<uint8_t>(CodeUnitValue(toUnit(unit)))) {
case 'b':
unit = '\b';
break;
case 'f':
unit = '\f';
break;
case 'n':
unit = '\n';
break;
case 'r':
unit = '\r';
break;
case 't':
unit = '\t';
break;
case 'v':
unit = '\v';
break;
case '\r':
matchLineTerminator('\n');
MOZ_FALLTHROUGH;
case '\n': {
if (!updateLineInfoForEOL()) {
return false;
}
continue;
}
case 'u': {
int32_t c2 = getCodeUnit();
if (c2 == EOF) {
ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
return false;
}
if (c2 == '{') {
uint32_t start = this->sourceUnits.offset() - 3;
uint32_t code = 0;
bool first = true;
bool valid = true;
do {
int32_t u3 = getCodeUnit();
if (u3 == EOF) {
if (parsingTemplate) {
TokenStreamAnyChars& anyChars = anyCharsAccess();
anyChars.setInvalidTemplateEscape(start,
InvalidEscapeType::Unicode);
valid = false;
break;
}
reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
return false;
}
if (u3 == '}') {
if (first) {
if (parsingTemplate) {
TokenStreamAnyChars& anyChars = anyCharsAccess();
anyChars.setInvalidTemplateEscape(
start, InvalidEscapeType::Unicode);
valid = false;
break;
}
reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
return false;
}
break;
}
if (!JS7_ISHEX(u3)) {
if (parsingTemplate) {
ungetCodeUnit(u3);
TokenStreamAnyChars& anyChars = anyCharsAccess();
anyChars.setInvalidTemplateEscape(start,
InvalidEscapeType::Unicode);
valid = false;
break;
}
reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
return false;
}
code = (code << 4) | JS7_UNHEX(u3);
if (code > unicode::NonBMPMax) {
if (parsingTemplate) {
TokenStreamAnyChars& anyChars = anyCharsAccess();
anyChars.setInvalidTemplateEscape(
start + 3, InvalidEscapeType::UnicodeOverflow);
valid = false;
break;
}
reportInvalidEscapeError(start + 3,
InvalidEscapeType::UnicodeOverflow);
return false;
}
first = false;
} while (true);
if (!valid) {
continue;
}
MOZ_ASSERT(code <= unicode::NonBMPMax);
if (!appendCodePointToCharBuffer(code)) {
return false;
}
continue;
}
char16_t v;
if (JS7_ISHEX(c2) && this->sourceUnits.matchHexDigits(3, &v)) {
unit = (JS7_UNHEX(c2) << 12) | v;
} else {
ungetCodeUnit(c2);
uint32_t start = this->sourceUnits.offset() - 2;
if (parsingTemplate) {
TokenStreamAnyChars& anyChars = anyCharsAccess();
anyChars.setInvalidTemplateEscape(start,
InvalidEscapeType::Unicode);
continue;
}
reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
return false;
}
break;
}
case 'x': {
char16_t v;
if (this->sourceUnits.matchHexDigits(2, &v)) {
unit = v;
} else {
uint32_t start = this->sourceUnits.offset() - 2;
if (parsingTemplate) {
TokenStreamAnyChars& anyChars = anyCharsAccess();
anyChars.setInvalidTemplateEscape(start,
InvalidEscapeType::Hexadecimal);
continue;
}
reportInvalidEscapeError(start, InvalidEscapeType::Hexadecimal);
return false;
}
break;
}
default: {
if (!JS7_ISOCT(unit)) {
break;
}
int32_t val = JS7_UNOCT(unit);
unit = peekCodeUnit();
if (MOZ_UNLIKELY(unit == EOF)) {
ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
return false;
}
if (val != 0 || IsAsciiDigit(unit)) {
TokenStreamAnyChars& anyChars = anyCharsAccess();
if (parsingTemplate) {
anyChars.setInvalidTemplateEscape(this->sourceUnits.offset() - 2,
InvalidEscapeType::Octal);
continue;
}
if (!strictModeError(JSMSG_DEPRECATED_OCTAL)) {
return false;
}
anyChars.flags.sawOctalEscape = true;
}
if (JS7_ISOCT(unit)) {
val = 8 * val + JS7_UNOCT(unit);
consumeKnownCodeUnit(unit);
unit = peekCodeUnit();
if (MOZ_UNLIKELY(unit == EOF)) {
ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
return false;
}
if (JS7_ISOCT(unit)) {
int32_t save = val;
val = 8 * val + JS7_UNOCT(unit);
if (val <= 0xFF) {
consumeKnownCodeUnit(unit);
} else {
val = save;
}
}
}
unit = char16_t(val);
break;
} }
if (!this->charBuffer.append(unit)) {
return false;
}
continue;
}
if (unit == '\r' || unit == '\n') {
if (!parsingTemplate) {
ungetCodeUnit(unit);
ReportPrematureEndOfLiteral(JSMSG_EOL_BEFORE_END_OF_STRING);
return false;
}
if (unit == '\r') {
unit = '\n';
matchLineTerminator('\n');
}
if (!updateLineInfoForEOL()) {
return false;
}
anyCharsAccess().updateFlagsForEOL();
} else if (parsingTemplate && unit == '$' && matchCodeUnit('{')) {
templateHead = true;
break;
}
if (!this->charBuffer.append(unit)) {
return false;
}
}
JSAtom* atom = drainCharBufferIntoAtom(anyCharsAccess().cx);
if (!atom) {
return false;
}
noteBadToken.release();
MOZ_ASSERT_IF(!parsingTemplate, !templateHead);
TokenKind kind = !parsingTemplate ? TokenKind::String
: templateHead ? TokenKind::TemplateHead
: TokenKind::NoSubsTemplate;
newAtomToken(kind, atom, start, modifier, out);
return true;
}
const char* TokenKindToDesc(TokenKind tt) {
switch (tt) {
#define EMIT_CASE(name, desc) \
case TokenKind::name: \
return desc;
FOR_EACH_TOKEN_KIND(EMIT_CASE)
#undef EMIT_CASE
case TokenKind::Limit:
MOZ_ASSERT_UNREACHABLE("TokenKind::Limit should not be passed.");
break;
}
return "<bad TokenKind>";
}
#ifdef DEBUG
const char* TokenKindToString(TokenKind tt) {
switch (tt) {
# define EMIT_CASE(name, desc) \
case TokenKind::name: \
return "TokenKind::" #name;
FOR_EACH_TOKEN_KIND(EMIT_CASE)
# undef EMIT_CASE
case TokenKind::Limit:
break;
}
return "<bad TokenKind>";
}
#endif
template class TokenStreamCharsBase<Utf8Unit>;
template class TokenStreamCharsBase<char16_t>;
template class GeneralTokenStreamChars<char16_t, TokenStreamAnyCharsAccess>;
template class TokenStreamChars<char16_t, TokenStreamAnyCharsAccess>;
template class TokenStreamSpecific<char16_t, TokenStreamAnyCharsAccess>;
template class GeneralTokenStreamChars<
Utf8Unit, ParserAnyCharsAccess<GeneralParser<FullParseHandler, Utf8Unit>>>;
template class GeneralTokenStreamChars<
Utf8Unit,
ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, Utf8Unit>>>;
template class GeneralTokenStreamChars<
char16_t, ParserAnyCharsAccess<GeneralParser<FullParseHandler, char16_t>>>;
template class GeneralTokenStreamChars<
char16_t,
ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, char16_t>>>;
template class TokenStreamChars<
Utf8Unit, ParserAnyCharsAccess<GeneralParser<FullParseHandler, Utf8Unit>>>;
template class TokenStreamChars<
Utf8Unit,
ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, Utf8Unit>>>;
template class TokenStreamChars<
char16_t, ParserAnyCharsAccess<GeneralParser<FullParseHandler, char16_t>>>;
template class TokenStreamChars<
char16_t,
ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, char16_t>>>;
template class TokenStreamSpecific<
Utf8Unit, ParserAnyCharsAccess<GeneralParser<FullParseHandler, Utf8Unit>>>;
template class TokenStreamSpecific<
Utf8Unit,
ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, Utf8Unit>>>;
template class TokenStreamSpecific<
char16_t, ParserAnyCharsAccess<GeneralParser<FullParseHandler, char16_t>>>;
template class TokenStreamSpecific<
char16_t,
ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, char16_t>>>;
}
}
JS_FRIEND_API int js_fgets(char* buf, int size, FILE* file) {
int n, i, c;
bool crflag;
n = size - 1;
if (n < 0) {
return -1;
}
crflag = false;
for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
buf[i] = c;
if (c == '\n') { i++; break;
}
if (crflag) { ungetc(c, file);
break; }
crflag = (c == '\r');
}
buf[i] = '\0';
return i;
}