#include <stdint.h>
#include <string.h>
#include <algorithm>
#include <map>
#include <string>
#include <vector>
#include "logging.h"
#include "pod_array.h"
#include "prog.h"
#include "sparse_set.h"
#include "stringpiece.h"
#include "strutil.h"
#include "utf.h"
#include "util.h"
#ifdef _MSC_VER
#endif
namespace lbug {
namespace regex {
struct OneState {
uint32_t matchcond; uint32_t action[1];
};
static const int kIndexShift = 16; static const int kEmptyShift = 6; static const int kRealCapShift = kEmptyShift + 1;
static const int kRealMaxCap = (kIndexShift - kRealCapShift) / 2 * 2;
static const int kCapShift = kRealCapShift - 2;
static const int kMaxCap = kRealMaxCap + 2;
static const uint32_t kMatchWins = 1 << kEmptyShift;
static const uint32_t kCapMask = ((1 << kRealMaxCap) - 1) << kRealCapShift;
static const uint32_t kImpossible = kEmptyWordBoundary | kEmptyNonWordBoundary;
void OnePass_Checks() {
static_assert(
(1 << kEmptyShift) - 1 == kEmptyAllFlags, "kEmptyShift disagrees with kEmptyAllFlags");
static_assert(
kMaxCap == Prog::kMaxOnePassCapture * 2, "kMaxCap disagrees with kMaxOnePassCapture");
}
static bool Satisfy(uint32_t cond, const StringPiece& context, const char* p) {
uint32_t satisfied = Prog::EmptyFlags(context, p);
if (cond & kEmptyAllFlags & ~satisfied)
return false;
return true;
}
static void ApplyCaptures(uint32_t cond, const char* p, const char** cap, int ncap) {
for (int i = 2; i < ncap; i++)
if (cond & (1 << kCapShift << i))
cap[i] = p;
}
static inline OneState* IndexToNode(uint8_t* nodes, int statesize, int nodeindex) {
return reinterpret_cast<OneState*>(nodes + statesize * nodeindex);
}
bool Prog::SearchOnePass(const StringPiece& text, const StringPiece& const_context, Anchor anchor,
MatchKind kind, StringPiece* match, int nmatch) {
if (anchor != kAnchored && kind != kFullMatch) {
LOG(DFATAL) << "Cannot use SearchOnePass for unanchored matches.";
return false;
}
int ncap = 2 * nmatch;
if (ncap < 2)
ncap = 2;
const char* cap[kMaxCap];
for (int i = 0; i < ncap; i++)
cap[i] = NULL;
const char* matchcap[kMaxCap];
for (int i = 0; i < ncap; i++)
matchcap[i] = NULL;
StringPiece context = const_context;
if (context.begin() == NULL)
context = text;
if (anchor_start() && context.begin() != text.begin())
return false;
if (anchor_end() && context.end() != text.end())
return false;
if (anchor_end())
kind = kFullMatch;
uint8_t* nodes = onepass_nodes_.data();
int statesize = sizeof(OneState) + bytemap_range() * sizeof(uint32_t);
OneState* state = IndexToNode(nodes, statesize, 0);
uint8_t* bytemap = bytemap_;
const char* bp = text.begin();
const char* ep = text.end();
const char* p;
bool matched = false;
matchcap[0] = bp;
cap[0] = bp;
uint32_t nextmatchcond = state->matchcond;
for (p = bp; p < ep; p++) {
int c = bytemap[*p & 0xFF];
uint32_t matchcond = nextmatchcond;
uint32_t cond = state->action[c];
if ((cond & kEmptyAllFlags) == 0 || Satisfy(cond, context, p)) {
uint32_t nextindex = cond >> kIndexShift;
state = IndexToNode(nodes, statesize, nextindex);
nextmatchcond = state->matchcond;
} else {
state = NULL;
nextmatchcond = kImpossible;
}
if (kind == kFullMatch)
goto skipmatch;
if (matchcond == kImpossible)
goto skipmatch;
if ((cond & kMatchWins) == 0 && (nextmatchcond & kEmptyAllFlags) == 0)
goto skipmatch;
if ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p)) {
for (int i = 2; i < 2 * nmatch; i++)
matchcap[i] = cap[i];
if (nmatch > 1 && (matchcond & kCapMask))
ApplyCaptures(matchcond, p, matchcap, ncap);
matchcap[1] = p;
matched = true;
if (kind == kFirstMatch && (cond & kMatchWins))
goto done;
}
skipmatch:
if (state == NULL)
goto done;
if ((cond & kCapMask) && nmatch > 1)
ApplyCaptures(cond, p, cap, ncap);
}
{
uint32_t matchcond = state->matchcond;
if (matchcond != kImpossible &&
((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p))) {
if (nmatch > 1 && (matchcond & kCapMask))
ApplyCaptures(matchcond, p, cap, ncap);
for (int i = 2; i < ncap; i++)
matchcap[i] = cap[i];
matchcap[1] = p;
matched = true;
}
}
done:
if (!matched)
return false;
for (int i = 0; i < nmatch; i++)
match[i] = StringPiece(
matchcap[2 * i], static_cast<size_t>(matchcap[2 * i + 1] - matchcap[2 * i]));
return true;
}
typedef SparseSet Instq;
static bool AddQ(Instq* q, int id) {
if (id == 0)
return true;
if (q->contains(id))
return false;
q->insert(id);
return true;
}
struct InstCond {
int id;
uint32_t cond;
};
bool Prog::IsOnePass() {
if (did_onepass_)
return onepass_nodes_.data() != NULL;
did_onepass_ = true;
if (start() == 0) return false;
int maxnodes = 2 + inst_count(kInstByteRange);
int statesize = sizeof(OneState) + bytemap_range() * sizeof(uint32_t);
if (maxnodes >= 65000 || dfa_mem_ / 4 / statesize < maxnodes)
return false;
int stacksize = inst_count(kInstCapture) + inst_count(kInstEmptyWidth) + inst_count(kInstNop) +
1; PODArray<InstCond> stack(stacksize);
int size = this->size();
PODArray<int> nodebyid(size); memset(nodebyid.data(), 0xFF, size * sizeof nodebyid[0]);
std::vector<uint8_t> nodes;
Instq tovisit(size), workq(size);
AddQ(&tovisit, start());
nodebyid[start()] = 0;
int nalloc = 1;
nodes.insert(nodes.end(), statesize, 0);
for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) {
int id = *it;
int nodeindex = nodebyid[id];
OneState* node = IndexToNode(nodes.data(), statesize, nodeindex);
for (int b = 0; b < bytemap_range_; b++)
node->action[b] = kImpossible;
node->matchcond = kImpossible;
workq.clear();
bool matched = false;
int nstack = 0;
stack[nstack].id = id;
stack[nstack++].cond = 0;
while (nstack > 0) {
int id = stack[--nstack].id;
uint32_t cond = stack[nstack].cond;
Loop:
Prog::Inst* ip = inst(id);
switch (ip->opcode()) {
default:
LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
break;
case kInstAltMatch:
DCHECK(!ip->last());
if (!AddQ(&workq, id + 1))
goto fail;
id = id + 1;
goto Loop;
case kInstByteRange: {
int nextindex = nodebyid[ip->out()];
if (nextindex == -1) {
if (nalloc >= maxnodes) {
goto fail;
}
nextindex = nalloc;
AddQ(&tovisit, ip->out());
nodebyid[ip->out()] = nalloc;
nalloc++;
nodes.insert(nodes.end(), statesize, 0);
node = IndexToNode(nodes.data(), statesize, nodeindex);
}
for (int c = ip->lo(); c <= ip->hi(); c++) {
int b = bytemap_[c];
while (c < 256 - 1 && bytemap_[c + 1] == b)
c++;
uint32_t act = node->action[b];
uint32_t newact = (nextindex << kIndexShift) | cond;
if (matched)
newact |= kMatchWins;
if ((act & kImpossible) == kImpossible) {
node->action[b] = newact;
} else if (act != newact) {
goto fail;
}
}
if (ip->foldcase()) {
Rune lo = std::max<Rune>(ip->lo(), 'a') + 'A' - 'a';
Rune hi = std::min<Rune>(ip->hi(), 'z') + 'A' - 'a';
for (int c = lo; c <= hi; c++) {
int b = bytemap_[c];
while (c < 256 - 1 && bytemap_[c + 1] == b)
c++;
uint32_t act = node->action[b];
uint32_t newact = (nextindex << kIndexShift) | cond;
if (matched)
newact |= kMatchWins;
if ((act & kImpossible) == kImpossible) {
node->action[b] = newact;
} else if (act != newact) {
goto fail;
}
}
}
if (ip->last())
break;
if (!AddQ(&workq, id + 1))
goto fail;
id = id + 1;
goto Loop;
}
case kInstCapture:
case kInstEmptyWidth:
case kInstNop:
if (!ip->last()) {
if (!AddQ(&workq, id + 1))
goto fail;
stack[nstack].id = id + 1;
stack[nstack++].cond = cond;
}
if (ip->opcode() == kInstCapture && ip->cap() < kMaxCap)
cond |= (1 << kCapShift) << ip->cap();
if (ip->opcode() == kInstEmptyWidth)
cond |= ip->empty();
if (!AddQ(&workq, ip->out())) {
goto fail;
}
id = ip->out();
goto Loop;
case kInstMatch:
if (matched) {
goto fail;
}
matched = true;
node->matchcond = cond;
if (ip->last())
break;
if (!AddQ(&workq, id + 1))
goto fail;
id = id + 1;
goto Loop;
case kInstFail:
break;
}
}
}
dfa_mem_ -= nalloc * statesize;
onepass_nodes_ = PODArray<uint8_t>(nalloc * statesize);
memmove(onepass_nodes_.data(), nodes.data(), nalloc * statesize);
return true;
fail:
return false;
}
} }