#include "re2.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#ifdef _MSC_VER
#include <intrin.h>
#endif
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <atomic>
#include <iterator>
#include <mutex>
#include <string>
#include <utility>
#include <vector>
#include "logging.h"
#include "prog.h"
#include "regexp.h"
#include "sparse_array.h"
#include "strutil.h"
#include "utf.h"
#include "util.h"
namespace lbug {
namespace regex {
static int maximum_global_replace_count = -1;
void RE2::FUZZING_ONLY_set_maximum_global_replace_count(int i) {
maximum_global_replace_count = i;
}
static const int kMaxArgs = 16;
static const int kVecSize = 1 + kMaxArgs;
const int RE2::Options::kDefaultMaxMem;
RE2::Options::Options(RE2::CannedOptions opt)
: max_mem_(kDefaultMaxMem), encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
posix_syntax_(opt == RE2::POSIX), longest_match_(opt == RE2::POSIX),
log_errors_(opt != RE2::Quiet), literal_(false), never_nl_(false), dot_nl_(false),
never_capture_(false), case_sensitive_(true), perl_classes_(false), word_boundary_(false),
one_line_(false) {}
struct EmptyStorage {
std::string empty_string;
std::map<std::string, int> empty_named_groups;
std::map<int, std::string> empty_group_names;
};
alignas(EmptyStorage) static char empty_storage[sizeof(EmptyStorage)];
static inline std::string* empty_string() {
return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_string;
}
static inline std::map<std::string, int>* empty_named_groups() {
return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_named_groups;
}
static inline std::map<int, std::string>* empty_group_names() {
return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_group_names;
}
static RE2::ErrorCode RegexpErrorToRE2(RegexpStatusCode code) {
switch (code) {
case kRegexpSuccess:
return RE2::NoError;
case kRegexpInternalError:
return RE2::ErrorInternal;
case kRegexpBadEscape:
return RE2::ErrorBadEscape;
case kRegexpBadCharClass:
return RE2::ErrorBadCharClass;
case kRegexpBadCharRange:
return RE2::ErrorBadCharRange;
case kRegexpMissingBracket:
return RE2::ErrorMissingBracket;
case kRegexpMissingParen:
return RE2::ErrorMissingParen;
case kRegexpUnexpectedParen:
return RE2::ErrorUnexpectedParen;
case kRegexpTrailingBackslash:
return RE2::ErrorTrailingBackslash;
case kRegexpRepeatArgument:
return RE2::ErrorRepeatArgument;
case kRegexpRepeatSize:
return RE2::ErrorRepeatSize;
case kRegexpRepeatOp:
return RE2::ErrorRepeatOp;
case kRegexpBadPerlOp:
return RE2::ErrorBadPerlOp;
case kRegexpBadUTF8:
return RE2::ErrorBadUTF8;
case kRegexpBadNamedCapture:
return RE2::ErrorBadNamedCapture;
}
return RE2::ErrorInternal;
}
static std::string trunc(const StringPiece& pattern) {
if (pattern.size() < 100)
return std::string(pattern);
return std::string(pattern.substr(0, 100)) + "...";
}
RE2::RE2(const char* pattern) {
Init(pattern, DefaultOptions);
}
RE2::RE2(const std::string& pattern) {
Init(pattern, DefaultOptions);
}
RE2::RE2(const StringPiece& pattern) {
Init(pattern, DefaultOptions);
}
RE2::RE2(const StringPiece& pattern, const Options& options) {
Init(pattern, options);
}
int RE2::Options::ParseFlags() const {
int flags = Regexp::ClassNL;
switch (encoding()) {
default:
if (log_errors())
LOG(ERROR) << "Unknown encoding " << encoding();
break;
case RE2::Options::EncodingUTF8:
break;
case RE2::Options::EncodingLatin1:
flags |= Regexp::Latin1;
break;
}
if (!posix_syntax())
flags |= Regexp::LikePerl;
if (literal())
flags |= Regexp::Literal;
if (never_nl())
flags |= Regexp::NeverNL;
if (dot_nl())
flags |= Regexp::DotNL;
if (never_capture())
flags |= Regexp::NeverCapture;
if (!case_sensitive())
flags |= Regexp::FoldCase;
if (perl_classes())
flags |= Regexp::PerlClasses;
if (word_boundary())
flags |= Regexp::PerlB;
if (one_line())
flags |= Regexp::OneLine;
return flags;
}
void RE2::Init(const StringPiece& pattern, const Options& options) {
static std::once_flag empty_once;
std::call_once(empty_once, []() { (void)new (empty_storage) EmptyStorage; });
pattern_ = new std::string(pattern);
options_.Copy(options);
entire_regexp_ = NULL;
suffix_regexp_ = NULL;
error_ = empty_string();
error_arg_ = empty_string();
num_captures_ = -1;
error_code_ = NoError;
longest_match_ = options_.longest_match();
is_one_pass_ = false;
prefix_foldcase_ = false;
prefix_.clear();
prog_ = NULL;
rprog_ = NULL;
named_groups_ = NULL;
group_names_ = NULL;
RegexpStatus status;
entire_regexp_ =
Regexp::Parse(*pattern_, static_cast<Regexp::ParseFlags>(options_.ParseFlags()), &status);
if (entire_regexp_ == NULL) {
if (options_.log_errors()) {
LOG(ERROR) << "Error parsing '" << trunc(*pattern_) << "': " << status.Text();
}
error_ = new std::string(status.Text());
error_code_ = RegexpErrorToRE2(status.code());
error_arg_ = new std::string(status.error_arg());
return;
}
bool foldcase;
Regexp* suffix;
if (entire_regexp_->RequiredPrefix(&prefix_, &foldcase, &suffix)) {
prefix_foldcase_ = foldcase;
suffix_regexp_ = suffix;
} else {
suffix_regexp_ = entire_regexp_->Incref();
}
prog_ = suffix_regexp_->CompileToProg(options_.max_mem() * 2 / 3);
if (prog_ == NULL) {
if (options_.log_errors())
LOG(ERROR) << "Error compiling '" << trunc(*pattern_) << "'";
error_ = new std::string("pattern too large - compile failed");
error_code_ = RE2::ErrorPatternTooLarge;
return;
}
num_captures_ = suffix_regexp_->NumCaptures();
is_one_pass_ = prog_->IsOnePass();
}
Prog* RE2::ReverseProg() const {
std::call_once(
rprog_once_,
[](const RE2* re) {
re->rprog_ = re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
if (re->rprog_ == NULL) {
if (re->options_.log_errors())
LOG(ERROR) << "Error reverse compiling '" << trunc(*re->pattern_) << "'";
}
},
this);
return rprog_;
}
RE2::~RE2() {
if (group_names_ != empty_group_names())
delete group_names_;
if (named_groups_ != empty_named_groups())
delete named_groups_;
delete rprog_;
delete prog_;
if (error_arg_ != empty_string())
delete error_arg_;
if (error_ != empty_string())
delete error_;
if (suffix_regexp_)
suffix_regexp_->Decref();
if (entire_regexp_)
entire_regexp_->Decref();
delete pattern_;
}
int RE2::ProgramSize() const {
if (prog_ == NULL)
return -1;
return prog_->size();
}
int RE2::ReverseProgramSize() const {
if (prog_ == NULL)
return -1;
Prog* prog = ReverseProg();
if (prog == NULL)
return -1;
return prog->size();
}
static int FindMSBSet(uint32_t n) {
DCHECK_NE(n, 0);
#if defined(__GNUC__)
return 31 ^ __builtin_clz(n);
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
unsigned long c;
_BitScanReverse(&c, n);
return static_cast<int>(c);
#else
int c = 0;
for (int shift = 1 << 4; shift != 0; shift >>= 1) {
uint32_t word = n >> shift;
if (word != 0) {
n = word;
c += shift;
}
}
return c;
#endif
}
static int Fanout(Prog* prog, std::vector<int>* histogram) {
SparseArray<int> fanout(prog->size());
prog->Fanout(&fanout);
int data[32] = {};
int size = 0;
for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
if (i->value() == 0)
continue;
uint32_t value = i->value();
int bucket = FindMSBSet(value);
bucket += value & (value - 1) ? 1 : 0;
++data[bucket];
size = std::max(size, bucket + 1);
}
if (histogram != NULL)
histogram->assign(data, data + size);
return size - 1;
}
int RE2::ProgramFanout(std::vector<int>* histogram) const {
if (prog_ == NULL)
return -1;
return Fanout(prog_, histogram);
}
int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
if (prog_ == NULL)
return -1;
Prog* prog = ReverseProg();
if (prog == NULL)
return -1;
return Fanout(prog, histogram);
}
const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
std::call_once(
named_groups_once_,
[](const RE2* re) {
if (re->suffix_regexp_ != NULL)
re->named_groups_ = re->suffix_regexp_->NamedCaptures();
if (re->named_groups_ == NULL)
re->named_groups_ = empty_named_groups();
},
this);
return *named_groups_;
}
const std::map<int, std::string>& RE2::CapturingGroupNames() const {
std::call_once(
group_names_once_,
[](const RE2* re) {
if (re->suffix_regexp_ != NULL)
re->group_names_ = re->suffix_regexp_->CaptureNames();
if (re->group_names_ == NULL)
re->group_names_ = empty_group_names();
},
this);
return *group_names_;
}
bool RE2::FullMatchN(const StringPiece& text, const RE2& re, const Arg* const args[], int n) {
return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n);
}
bool RE2::PartialMatchN(const StringPiece& text, const RE2& re, const Arg* const args[], int n) {
return re.DoMatch(text, UNANCHORED, NULL, args, n);
}
bool RE2::ConsumeN(StringPiece* input, const RE2& re, const Arg* const args[], int n) {
size_t consumed;
if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) {
input->remove_prefix(consumed);
return true;
} else {
return false;
}
}
bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re, const Arg* const args[], int n) {
size_t consumed;
if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) {
input->remove_prefix(consumed);
return true;
} else {
return false;
}
}
bool RE2::Replace(std::string* str, const RE2& re, const StringPiece& rewrite) {
StringPiece vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
if (nvec > 1 + re.NumberOfCapturingGroups())
return false;
if (nvec > static_cast<int>(arraysize(vec)))
return false;
if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
return false;
std::string s;
if (!re.Rewrite(&s, rewrite, vec, nvec))
return false;
assert(vec[0].data() >= str->data());
assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
str->replace(vec[0].data() - str->data(), vec[0].size(), s);
return true;
}
int RE2::GlobalReplace(std::string* str, const RE2& re, const StringPiece& rewrite) {
StringPiece vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
if (nvec > 1 + re.NumberOfCapturingGroups())
return false;
if (nvec > static_cast<int>(arraysize(vec)))
return false;
const char* p = str->data();
const char* ep = p + str->size();
const char* lastend = NULL;
std::string out;
int count = 0;
while (p <= ep) {
if (maximum_global_replace_count != -1 && count >= maximum_global_replace_count)
break;
if (!re.Match(
*str, static_cast<size_t>(p - str->data()), str->size(), UNANCHORED, vec, nvec))
break;
if (p < vec[0].data())
out.append(p, vec[0].data() - p);
if (vec[0].data() == lastend && vec[0].empty()) {
if (re.options().encoding() == RE2::Options::EncodingUTF8 &&
fullrune(p, static_cast<int>(std::min(ptrdiff_t{4}, ep - p)))) {
Rune r;
int n = chartorune(&r, p);
if (r > Runemax) {
n = 1;
r = Runeerror;
}
if (!(n == 1 && r == Runeerror)) { out.append(p, n);
p += n;
continue;
}
}
if (p < ep)
out.append(p, 1);
p++;
continue;
}
re.Rewrite(&out, rewrite, vec, nvec);
p = vec[0].data() + vec[0].size();
lastend = p;
count++;
}
if (count == 0)
return 0;
if (p < ep)
out.append(p, ep - p);
using std::swap;
swap(out, *str);
return count;
}
bool RE2::Extract(
const StringPiece& text, const RE2& re, const StringPiece& rewrite, std::string* out) {
StringPiece vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
if (nvec > 1 + re.NumberOfCapturingGroups())
return false;
if (nvec > static_cast<int>(arraysize(vec)))
return false;
if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
return false;
out->clear();
return re.Rewrite(out, rewrite, vec, nvec);
}
std::string RE2::QuoteMeta(const StringPiece& unquoted) {
std::string result;
result.reserve(unquoted.size() << 1);
for (size_t ii = 0; ii < unquoted.size(); ++ii) {
if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
(unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
(unquoted[ii] < '0' || unquoted[ii] > '9') && unquoted[ii] != '_' &&
!(unquoted[ii] & 128)) {
if (unquoted[ii] == '\0') { result += "\\x00";
continue;
}
result += '\\';
}
result += unquoted[ii];
}
return result;
}
bool RE2::PossibleMatchRange(std::string* min, std::string* max, int maxlen) const {
if (prog_ == NULL)
return false;
int n = static_cast<int>(prefix_.size());
if (n > maxlen)
n = maxlen;
*min = prefix_.substr(0, n);
*max = prefix_.substr(0, n);
if (prefix_foldcase_) {
for (int i = 0; i < n; i++) {
char& c = (*min)[i];
if ('a' <= c && c <= 'z')
c += 'A' - 'a';
}
}
std::string dmin, dmax;
maxlen -= n;
if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) {
min->append(dmin);
max->append(dmax);
} else if (!max->empty()) {
PrefixSuccessor(max);
} else {
*min = "";
*max = "";
return false;
}
return true;
}
static int ascii_strcasecmp(const char* a, const char* b, size_t len) {
const char* ae = a + len;
for (; a < ae; a++, b++) {
uint8_t x = *a;
uint8_t y = *b;
if ('A' <= y && y <= 'Z')
y += 'a' - 'A';
if (x != y)
return x - y;
}
return 0;
}
bool RE2::Match(const StringPiece& text, size_t startpos, size_t endpos, Anchor re_anchor,
StringPiece* submatch, int nsubmatch) const {
if (!ok()) {
if (options_.log_errors())
LOG(ERROR) << "Invalid RE2: " << *error_;
return false;
}
if (startpos > endpos || endpos > text.size()) {
if (options_.log_errors())
LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
<< "startpos: " << startpos << ", "
<< "endpos: " << endpos << ", "
<< "text size: " << text.size() << "]";
return false;
}
StringPiece subtext = text;
subtext.remove_prefix(startpos);
subtext.remove_suffix(text.size() - endpos);
StringPiece match;
StringPiece* matchp = &match;
if (nsubmatch == 0)
matchp = NULL;
int ncap = 1 + NumberOfCapturingGroups();
if (ncap > nsubmatch)
ncap = nsubmatch;
if (prog_->anchor_start() && startpos != 0)
return false;
if (prog_->anchor_end() && endpos != text.size())
return false;
if (prog_->anchor_start() && prog_->anchor_end())
re_anchor = ANCHOR_BOTH;
else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH)
re_anchor = ANCHOR_START;
size_t prefixlen = 0;
if (!prefix_.empty()) {
if (startpos != 0)
return false;
prefixlen = prefix_.size();
if (prefixlen > subtext.size())
return false;
if (prefix_foldcase_) {
if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0)
return false;
} else {
if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0)
return false;
}
subtext.remove_prefix(prefixlen);
if (re_anchor != ANCHOR_BOTH)
re_anchor = ANCHOR_START;
}
Prog::Anchor anchor = Prog::kUnanchored;
Prog::MatchKind kind = longest_match_ ? Prog::kLongestMatch : Prog::kFirstMatch;
bool can_one_pass = is_one_pass_ && ncap <= Prog::kMaxOnePassCapture;
bool can_bit_state = prog_->CanBitState();
size_t bit_state_text_max_size = prog_->bit_state_text_max_size();
#ifdef RE2_HAVE_THREAD_LOCAL
hooks::context = this;
#endif
bool dfa_failed = false;
bool skipped_test = false;
switch (re_anchor) {
default:
LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
return false;
case UNANCHORED: {
if (prog_->anchor_end()) {
Prog* prog = ReverseProg();
if (prog == NULL) {
skipped_test = true;
break;
}
if (!prog->SearchDFA(subtext, text, Prog::kAnchored, Prog::kLongestMatch, matchp,
&dfa_failed, NULL)) {
if (dfa_failed) {
if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
<< "pattern length " << pattern_->size() << ", "
<< "program size " << prog->size() << ", "
<< "list count " << prog->list_count() << ", "
<< "bytemap range " << prog->bytemap_range();
skipped_test = true;
break;
}
return false;
}
if (matchp == NULL) return true;
break;
}
if (!prog_->SearchDFA(subtext, text, anchor, kind, matchp, &dfa_failed, NULL)) {
if (dfa_failed) {
if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
<< "pattern length " << pattern_->size() << ", "
<< "program size " << prog_->size() << ", "
<< "list count " << prog_->list_count() << ", "
<< "bytemap range " << prog_->bytemap_range();
skipped_test = true;
break;
}
return false;
}
if (matchp == NULL) return true;
Prog* prog = ReverseProg();
if (prog == NULL) {
skipped_test = true;
break;
}
if (!prog->SearchDFA(
match, text, Prog::kAnchored, Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
if (dfa_failed) {
if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
<< "pattern length " << pattern_->size() << ", "
<< "program size " << prog->size() << ", "
<< "list count " << prog->list_count() << ", "
<< "bytemap range " << prog->bytemap_range();
skipped_test = true;
break;
}
if (options_.log_errors())
LOG(ERROR) << "SearchDFA inconsistency";
return false;
}
break;
}
case ANCHOR_BOTH:
case ANCHOR_START:
if (re_anchor == ANCHOR_BOTH)
kind = Prog::kFullMatch;
anchor = Prog::kAnchored;
if (can_one_pass && text.size() <= 4096 && (ncap > 1 || text.size() <= 16)) {
skipped_test = true;
break;
}
if (can_bit_state && text.size() <= bit_state_text_max_size && ncap > 1) {
skipped_test = true;
break;
}
if (!prog_->SearchDFA(subtext, text, anchor, kind, &match, &dfa_failed, NULL)) {
if (dfa_failed) {
if (options_.log_errors())
LOG(ERROR) << "DFA out of memory: "
<< "pattern length " << pattern_->size() << ", "
<< "program size " << prog_->size() << ", "
<< "list count " << prog_->list_count() << ", "
<< "bytemap range " << prog_->bytemap_range();
skipped_test = true;
break;
}
return false;
}
break;
}
if (!skipped_test && ncap <= 1) {
if (ncap == 1)
submatch[0] = match;
} else {
StringPiece subtext1;
if (skipped_test) {
subtext1 = subtext;
} else {
subtext1 = match;
anchor = Prog::kAnchored;
kind = Prog::kFullMatch;
}
if (can_one_pass && anchor != Prog::kUnanchored) {
if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
if (!skipped_test && options_.log_errors())
LOG(ERROR) << "SearchOnePass inconsistency";
return false;
}
} else if (can_bit_state && subtext1.size() <= bit_state_text_max_size) {
if (!prog_->SearchBitState(subtext1, text, anchor, kind, submatch, ncap)) {
if (!skipped_test && options_.log_errors())
LOG(ERROR) << "SearchBitState inconsistency";
return false;
}
} else {
if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
if (!skipped_test && options_.log_errors())
LOG(ERROR) << "SearchNFA inconsistency";
return false;
}
}
}
if (prefixlen > 0 && nsubmatch > 0)
submatch[0] = StringPiece(submatch[0].data() - prefixlen, submatch[0].size() + prefixlen);
for (int i = ncap; i < nsubmatch; i++)
submatch[i] = StringPiece();
return true;
}
bool RE2::DoMatch(const StringPiece& text, Anchor re_anchor, size_t* consumed,
const Arg* const* args, int n) const {
if (!ok()) {
if (options_.log_errors())
LOG(ERROR) << "Invalid RE2: " << *error_;
return false;
}
if (NumberOfCapturingGroups() < n) {
return false;
}
int nvec;
if (n == 0 && consumed == NULL)
nvec = 0;
else
nvec = n + 1;
StringPiece* vec;
StringPiece stkvec[kVecSize];
StringPiece* heapvec = NULL;
if (nvec <= static_cast<int>(arraysize(stkvec))) {
vec = stkvec;
} else {
vec = new StringPiece[nvec];
heapvec = vec;
}
if (!Match(text, 0, text.size(), re_anchor, vec, nvec)) {
delete[] heapvec;
return false;
}
if (consumed != NULL)
*consumed = static_cast<size_t>(EndPtr(vec[0]) - BeginPtr(text));
if (n == 0 || args == NULL) {
delete[] heapvec;
return true;
}
for (int i = 0; i < n; i++) {
const StringPiece& s = vec[i + 1];
if (!args[i]->Parse(s.data(), s.size())) {
delete[] heapvec;
return false;
}
}
delete[] heapvec;
return true;
}
bool RE2::CheckRewriteString(const StringPiece& rewrite, std::string* error) const {
int max_token = -1;
for (const char *s = rewrite.data(), *end = s + rewrite.size(); s < end; s++) {
int c = *s;
if (c != '\\') {
continue;
}
if (++s == end) {
*error = "Rewrite schema error: '\\' not allowed at end.";
return false;
}
c = *s;
if (c == '\\') {
continue;
}
if (!isdigit(c)) {
*error = "Rewrite schema error: "
"'\\' must be followed by a digit or '\\'.";
return false;
}
int n = (c - '0');
if (max_token < n) {
max_token = n;
}
}
if (max_token > NumberOfCapturingGroups()) {
*error = StringPrintf("Rewrite schema requests %d matches, but the regexp only has %d "
"parenthesized subexpressions.",
max_token, NumberOfCapturingGroups());
return false;
}
return true;
}
int RE2::MaxSubmatch(const StringPiece& rewrite) {
int max = 0;
for (const char *s = rewrite.data(), *end = s + rewrite.size(); s < end; s++) {
if (*s == '\\') {
s++;
int c = (s < end) ? *s : -1;
if (isdigit(c)) {
int n = (c - '0');
if (n > max)
max = n;
}
}
}
return max;
}
bool RE2::Rewrite(
std::string* out, const StringPiece& rewrite, const StringPiece* vec, int veclen) const {
for (const char *s = rewrite.data(), *end = s + rewrite.size(); s < end; s++) {
if (*s != '\\') {
out->push_back(*s);
continue;
}
s++;
int c = (s < end) ? *s : -1;
if (isdigit(c)) {
int n = (c - '0');
if (n >= veclen) {
if (options_.log_errors()) {
LOG(ERROR) << "invalid substitution \\" << n << " from " << veclen << " groups";
}
return false;
}
StringPiece snip = vec[n];
if (!snip.empty())
out->append(snip.data(), snip.size());
} else if (c == '\\') {
out->push_back('\\');
} else {
if (options_.log_errors())
LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
return false;
}
}
return true;
}
namespace re2_internal {
template<>
bool Parse(const char* str, size_t n, void* dest) {
return (dest == NULL);
}
template<>
bool Parse(const char* str, size_t n, std::string* dest) {
if (dest == NULL)
return true;
dest->assign(str, n);
return true;
}
template<>
bool Parse(const char* str, size_t n, StringPiece* dest) {
if (dest == NULL)
return true;
*dest = StringPiece(str, n);
return true;
}
template<>
bool Parse(const char* str, size_t n, char* dest) {
if (n != 1)
return false;
if (dest == NULL)
return true;
*dest = str[0];
return true;
}
template<>
bool Parse(const char* str, size_t n, signed char* dest) {
if (n != 1)
return false;
if (dest == NULL)
return true;
*dest = str[0];
return true;
}
template<>
bool Parse(const char* str, size_t n, unsigned char* dest) {
if (n != 1)
return false;
if (dest == NULL)
return true;
*dest = str[0];
return true;
}
static const int kMaxNumberLength = 32;
static const char* TerminateNumber(
char* buf, size_t nbuf, const char* str, size_t* np, bool accept_spaces) {
size_t n = *np;
if (n == 0)
return "";
if (n > 0 && isspace(*str)) {
if (!accept_spaces) {
return "";
}
while (n > 0 && isspace(*str)) {
n--;
str++;
}
}
bool neg = false;
if (n >= 1 && str[0] == '-') {
neg = true;
n--;
str++;
}
if (n >= 3 && str[0] == '0' && str[1] == '0') {
while (n >= 3 && str[2] == '0') {
n--;
str++;
}
}
if (neg) { n++;
str--;
}
if (n > nbuf - 1)
return "";
memmove(buf, str, n);
if (neg) {
buf[0] = '-';
}
buf[n] = '\0';
*np = n;
return buf;
}
template<>
bool Parse(const char* str, size_t n, float* dest) {
if (n == 0)
return false;
static const int kMaxLength = 200;
char buf[kMaxLength + 1];
str = TerminateNumber(buf, sizeof buf, str, &n, true);
char* end;
errno = 0;
float r = strtof(str, &end);
if (end != str + n)
return false; if (errno)
return false;
if (dest == NULL)
return true;
*dest = r;
return true;
}
template<>
bool Parse(const char* str, size_t n, double* dest) {
if (n == 0)
return false;
static const int kMaxLength = 200;
char buf[kMaxLength + 1];
str = TerminateNumber(buf, sizeof buf, str, &n, true);
char* end;
errno = 0;
double r = strtod(str, &end);
if (end != str + n)
return false; if (errno)
return false;
if (dest == NULL)
return true;
*dest = r;
return true;
}
template<>
bool Parse(const char* str, size_t n, long* dest, int radix) {
if (n == 0)
return false;
char buf[kMaxNumberLength + 1];
str = TerminateNumber(buf, sizeof buf, str, &n, false);
char* end;
errno = 0;
long r = strtol(str, &end, radix);
if (end != str + n)
return false; if (errno)
return false;
if (dest == NULL)
return true;
*dest = r;
return true;
}
template<>
bool Parse(const char* str, size_t n, unsigned long* dest, int radix) {
if (n == 0)
return false;
char buf[kMaxNumberLength + 1];
str = TerminateNumber(buf, sizeof buf, str, &n, false);
if (str[0] == '-') {
return false;
}
char* end;
errno = 0;
unsigned long r = strtoul(str, &end, radix);
if (end != str + n)
return false; if (errno)
return false;
if (dest == NULL)
return true;
*dest = r;
return true;
}
template<>
bool Parse(const char* str, size_t n, short* dest, int radix) {
long r;
if (!Parse(str, n, &r, radix))
return false; if ((short)r != r)
return false; if (dest == NULL)
return true;
*dest = (short)r;
return true;
}
template<>
bool Parse(const char* str, size_t n, unsigned short* dest, int radix) {
unsigned long r;
if (!Parse(str, n, &r, radix))
return false; if ((unsigned short)r != r)
return false; if (dest == NULL)
return true;
*dest = (unsigned short)r;
return true;
}
template<>
bool Parse(const char* str, size_t n, int* dest, int radix) {
long r;
if (!Parse(str, n, &r, radix))
return false; if ((int)r != r)
return false; if (dest == NULL)
return true;
*dest = (int)r;
return true;
}
template<>
bool Parse(const char* str, size_t n, unsigned int* dest, int radix) {
unsigned long r;
if (!Parse(str, n, &r, radix))
return false; if ((unsigned int)r != r)
return false; if (dest == NULL)
return true;
*dest = (unsigned int)r;
return true;
}
template<>
bool Parse(const char* str, size_t n, long long* dest, int radix) {
if (n == 0)
return false;
char buf[kMaxNumberLength + 1];
str = TerminateNumber(buf, sizeof buf, str, &n, false);
char* end;
errno = 0;
long long r = strtoll(str, &end, radix);
if (end != str + n)
return false; if (errno)
return false;
if (dest == NULL)
return true;
*dest = r;
return true;
}
template<>
bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) {
if (n == 0)
return false;
char buf[kMaxNumberLength + 1];
str = TerminateNumber(buf, sizeof buf, str, &n, false);
if (str[0] == '-') {
return false;
}
char* end;
errno = 0;
unsigned long long r = strtoull(str, &end, radix);
if (end != str + n)
return false; if (errno)
return false;
if (dest == NULL)
return true;
*dest = r;
return true;
}
}
namespace hooks {
#ifdef RE2_HAVE_THREAD_LOCAL
thread_local const RE2* context = NULL;
#endif
template<typename T>
union Hook {
void Store(T* cb) { cb_.store(cb, std::memory_order_release); }
T* Load() const { return cb_.load(std::memory_order_acquire); }
#if !defined(__clang__) && defined(_MSC_VER)
static_assert(ATOMIC_POINTER_LOCK_FREE == 2, "std::atomic<T*> must be always lock-free");
T* cb_for_constinit_;
#endif
std::atomic<T*> cb_;
};
template<typename T>
static void DoNothing(const T&) {}
#define DEFINE_HOOK(type, name) \
static Hook<type##Callback> name##_hook = {{&DoNothing<type>}}; \
void Set##type##Hook(type##Callback* cb) { name##_hook.Store(cb); } \
type##Callback* Get##type##Hook() { return name##_hook.Load(); }
DEFINE_HOOK(DFAStateCacheReset, dfa_state_cache_reset)
DEFINE_HOOK(DFASearchFailure, dfa_search_failure)
#undef DEFINE_HOOK
} } }