#include <limits.h>
#include "lua.h"
#include "lauxlib.h"
#include "lptypes.h"
#include "lpcode.h"
#include "buf.h"
#include "rplx.h"
#include "vm.h"
#include "lpprint.h"
#define NOINST -1
static const Charset fullset_ =
{{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}};
static const Charset *fullset = &fullset_;
static Opcode charsettype (const byte *cs, int *c) {
int count = 0;
int i;
int candidate = -1;
for (i = 0; i < CHARSETSIZE; i++) {
int b = cs[i];
if (b == 0) {
if (count > 1)
return ISet;
}
else if (b == 0xFF) {
if (count < (i * BITSPERCHAR))
return ISet;
else count += BITSPERCHAR;
}
else if ((b & (b - 1)) == 0) {
if (count > 0)
return ISet;
else {
count++;
candidate = i;
}
}
else return ISet;
}
switch (count) {
case 0: return IFail;
case 1: {
int b = cs[candidate];
*c = candidate * BITSPERCHAR;
if ((b & 0xF0) != 0) { *c += 4; b >>= 4; }
if ((b & 0x0C) != 0) { *c += 2; b >>= 2; }
if ((b & 0x02) != 0) { *c += 1; }
return IChar;
}
default: {
assert(count == CHARSETSIZE * BITSPERCHAR);
return IAny;
}
}
}
static void cs_complement (Charset *cs) {
loopset(i, cs->cs[i] = ~cs->cs[i]);
}
static int cs_equal (const byte *cs1, const byte *cs2) {
loopset(i, if (cs1[i] != cs2[i]) return 0);
return 1;
}
static int cs_disjoint (const Charset *cs1, const Charset *cs2) {
loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;)
return 1;
}
int tocharset (TTree *tree, Charset *cs) {
switch (tree->tag) {
case TSet: {
loopset(i, cs->cs[i] = treebuffer(tree)[i]);
return 1;
}
case TChar: {
assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX);
loopset(i, cs->cs[i] = 0);
setchar(cs->cs, tree->u.n);
return 1;
}
case TAny: {
loopset(i, cs->cs[i] = 0xFF);
return 1;
}
default: return 0;
}
}
int hascaptures (TTree *tree) {
tailcall:
switch (tree->tag) {
case TCapture: case TRunTime: case TBackref:
return 1;
case TCall:
tree = sib2(tree); goto tailcall;
case TOpenCall: assert(0);
default: {
switch (numsiblings[tree->tag]) {
case 1:
tree = sib1(tree); goto tailcall;
case 2:
if (hascaptures(sib1(tree))) return 1;
tree = sib2(tree); goto tailcall;
default: assert(numsiblings[tree->tag] == 0); return 0;
}
}
}
}
int checkaux (TTree *tree, int pred) {
tailcall:
switch (tree->tag) {
case TChar: case TSet: case TAny:
case TFalse: case TOpenCall:
return 0;
case TRep: case TTrue: case THalt:
return 1;
case TNot: case TBehind:
if (pred == PEnofail) return 0;
else return 1;
case TAnd:
if (pred == PEnullable) return 1;
tree = sib1(tree); goto tailcall;
case TRunTime:
assert(0);
if (pred == PEnofail) return 0;
tree = sib1(tree); goto tailcall;
case TBackref:
if (pred == PEnofail) return 0;
tree = sib1(tree); goto tailcall;
case TSeq:
if (!checkaux(sib1(tree), pred)) return 0;
tree = sib2(tree); goto tailcall;
case TChoice:
if (checkaux(sib2(tree), pred)) return 1;
tree = sib1(tree); goto tailcall;
case TCapture: case TGrammar: case TRule:
tree = sib1(tree); goto tailcall;
case TCall:
tree = sib2(tree); goto tailcall;
default: assert(0); return 0;
}
}
int fixedlenx (TTree *tree, int count, int len) {
tailcall:
switch (tree->tag) {
case TChar: case TSet: case TAny:
return len + 1;
case TFalse: case TTrue: case TNot: case TAnd: case TBehind: case THalt:
return len;
case TRep: case TRunTime: case TOpenCall: case TBackref:
return -1;
case TCapture: case TRule: case TGrammar:
tree = sib1(tree); goto tailcall;
case TCall:
if (count++ >= MAXRULES)
return -1;
tree = sib2(tree); goto tailcall;
case TSeq: {
len = fixedlenx(sib1(tree), count, len);
if (len < 0) return -1;
tree = sib2(tree); goto tailcall;
}
case TChoice: {
int n1, n2;
n1 = fixedlenx(sib1(tree), count, len);
if (n1 < 0) return -1;
n2 = fixedlenx(sib2(tree), count, len);
if (n1 == n2) return n1;
else return -1;
}
default: assert(0); return 0;
};
}
static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
tailcall:
switch (tree->tag) {
case TChar: case TSet: case TAny: {
tocharset(tree, firstset);
return 0;
}
case TTrue: {
loopset(i, firstset->cs[i] = follow->cs[i]);
return 1;
}
case TFalse: {
loopset(i, firstset->cs[i] = 0);
return 0;
}
case THalt: {
loopset(i, firstset->cs[i] = follow->cs[i]);
return 1;
}
case TChoice: {
Charset csaux;
int e1 = getfirst(sib1(tree), follow, firstset);
int e2 = getfirst(sib2(tree), follow, &csaux);
loopset(i, firstset->cs[i] |= csaux.cs[i]);
return e1 | e2;
}
case TSeq: {
if (!nullable(sib1(tree))) {
tree = sib1(tree); follow = fullset; goto tailcall;
}
else {
Charset csaux;
int e2 = getfirst(sib2(tree), follow, &csaux);
int e1 = getfirst(sib1(tree), &csaux, firstset);
if (e1 == 0) return 0;
else if ((e1 | e2) & 2)
return 2;
else return e2;
}
}
case TRep: {
getfirst(sib1(tree), follow, firstset);
loopset(i, firstset->cs[i] |= follow->cs[i]);
return 1;
}
case TCapture: case TGrammar: case TRule: {
tree = sib1(tree); goto tailcall;
}
case TRunTime: {
assert(0);
int e = getfirst(sib1(tree), fullset, firstset);
if (e) return 2;
else return 0;
}
case TBackref: {
return 2;
}
case TCall: {
tree = sib2(tree); goto tailcall;
}
case TAnd: {
int e = getfirst(sib1(tree), follow, firstset);
loopset(i, firstset->cs[i] &= follow->cs[i]);
return e;
}
case TNot: {
if (tocharset(sib1(tree), firstset)) {
cs_complement(firstset);
return 1;
}
}
case TBehind: {
int e = getfirst(sib1(tree), follow, firstset);
loopset(i, firstset->cs[i] = follow->cs[i]);
return e | 1;
}
default: assert(0); return 0;
}
}
static int headfail (TTree *tree) {
tailcall:
switch (tree->tag) {
case TChar: case TSet: case TAny: case TFalse:
return 1;
case TTrue: case TRep: case TRunTime: case TNot:
case TBehind: case THalt:
return 0;
case TCapture: case TGrammar: case TRule: case TAnd:
tree = sib1(tree); goto tailcall;
case TCall:
tree = sib2(tree); goto tailcall;
case TSeq:
if (!nofail(sib2(tree))) return 0;
tree = sib1(tree); goto tailcall;
case TChoice:
if (!headfail(sib1(tree))) return 0;
tree = sib2(tree); goto tailcall;
case TBackref:
return 0;
default: assert(0); return 0;
}
}
static int needfollow (TTree *tree) {
tailcall:
switch (tree->tag) {
case TChar: case TSet: case TAny:
case TFalse: case TTrue: case TAnd: case TNot: case THalt:
case TRunTime: case TBackref: case TGrammar: case TCall: case TBehind:
return 0;
case TChoice: case TRep:
return 1;
case TCapture:
tree = sib1(tree); goto tailcall;
case TSeq:
tree = sib2(tree); goto tailcall;
default: assert(0); return 0;
}
}
typedef struct CompileState {
Pattern *p;
int ncode;
lua_State *L;
} CompileState;
static int codegen (CompileState *compst, TTree *tree, int opt, int tt,
const Charset *fl);
void realloccode (lua_State *L, Pattern *p, int nsize) {
void *ud;
lua_Alloc f = lua_getallocf(L, &ud);
void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction),
nsize * sizeof(Instruction));
if (newblock == NULL && nsize > 0)
luaL_error(L, "not enough memory");
p->code = (Instruction *)newblock;
p->codesize = nsize;
}
static int nextinstruction (CompileState *compst) {
int size = compst->p->codesize;
if (compst->ncode == size)
realloccode(compst->L, compst->p, size * 2);
return compst->ncode++;
}
#define getinstr(cs,i) ((cs)->p->code[i])
static int addinstruction1 (CompileState *compst, Opcode op) {
int i = nextinstruction(compst);
setopcode(&getinstr(compst, i), op);
return i;
}
static int addinstruction (CompileState *compst, Opcode op) {
int i = addinstruction1(compst, op);
if (!((op==ISet) || (op==ISpan) || (sizei(&getinstr(compst, i)) == 1))) {
printf("%s:%d: opcode %d (%s)\n", __FILE__, __LINE__, op, OPCODE_NAME(op));
assert(0);
}
return i;
}
static int addinstruction_char (CompileState *compst, Opcode op, int c) {
int i = addinstruction(compst, op);
setichar(&getinstr(compst, i), c);
assert(opcode(&getinstr(compst, i)) == op);
assert(ichar(&getinstr(compst, i)) == (c & 0xFF));
assert(!(c & 0xFFFFFF00));
assert(sizei(&getinstr(compst, i)) == 1);
if (!(op==IChar)) {
printf("%s:%d: opcode %d (%s)\n", __FILE__, __LINE__, op, OPCODE_NAME(op));
assert(0);
}
return i;
}
static int addinstruction_aux (CompileState *compst, Opcode op, int k) {
int i = addinstruction(compst, op);
setindex(&getinstr(compst, i), k);
assert(opcode(&getinstr(compst, i)) == op);
assert(index(&getinstr(compst, i)) == (k & 0xFFFFFF));
assert(!(k & 0xFF000000));
assert(sizei(&getinstr(compst, i)) == 1);
return i;
}
static int addinstruction_offset (CompileState *compst, Opcode op, int offset) {
int i = addinstruction1(compst, op);
nextinstruction(compst);
setaddr(&getinstr(compst, i), offset);
assert(opcode(&getinstr(compst, i)) == op);
assert(addr(&getinstr(compst, i)) == offset);
if (! ((op == ITestSet) || (sizei(&getinstr(compst, i)) == 2)) ) {
printf("%s:%d: opcode %d (%s)\n", __FILE__, __LINE__, op, OPCODE_NAME(op));
assert(0);
}
return i;
}
static int addinstcap (CompileState *compst, Opcode op, int cap, int idx) {
int i;
i = addinstruction_offset(compst, op, cap);
setindex(&getinstr(compst, i), idx);
assert(index(&getinstr(compst, i)) == idx);
assert(!(cap & 0xFFFFFF00));
assert(!(idx & 0xFF000000));
return i;
}
#define gethere(compst) ((compst)->ncode)
static void jumptothere (CompileState *compst, int instruction, int target) {
if (instruction >= 0) {
assert(target != instruction);
int op = opcode(&getinstr(compst, instruction)); UNUSED(op);
setaddr(&getinstr(compst, instruction), target - instruction);
assert(opcode(&getinstr(compst, instruction)) == op);
assert(addr(&getinstr(compst, instruction)) == (target - instruction));
}
}
static void jumptohere (CompileState *compst, int instruction) {
jumptothere(compst, instruction, gethere(compst));
}
static void codechar (CompileState *compst, int c, int tt) {
Instruction *inst = &getinstr(compst, tt);
if ((tt >= 0) && opcode(inst) == ITestChar && ichar(inst) == c)
addinstruction(compst, IAny);
else {
addinstruction_char(compst, IChar, c);
}
}
static void addcharset (CompileState *compst, const byte *cs) {
int p = gethere(compst);
int i;
for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++)
nextinstruction(compst);
loopset(j, getinstr(compst, p).buff[j] = cs[j]);
}
static void codecharset (CompileState *compst, const byte *cs, int tt) {
int c = 0;
Opcode op = charsettype(cs, &c);
switch (op) {
case IChar: codechar(compst, c, tt); break;
case ISet: {
if (tt >= 0 && opcode(&getinstr(compst, tt)) == ITestSet &&
cs_equal(cs, getinstr(compst, tt + 1).buff))
addinstruction(compst, IAny);
else {
addinstruction(compst, ISet);
addcharset(compst, cs);
}
break;
}
default: addinstruction_char(compst, op, c); break;
}
}
static int codetestset (CompileState *compst, Charset *cs, int e) {
if (e) return NOINST;
else {
int c = 0;
Opcode op = charsettype(cs->cs, &c);
switch (op) {
case IFail: return addinstruction_offset(compst, IJmp, 0);
case IAny: return addinstruction_offset(compst, ITestAny, 0);
case IChar: {
int i = addinstruction_offset(compst, ITestChar, 0);
setichar(&getinstr(compst, i), c);
return i;
}
case ISet: {
int i = addinstruction_offset(compst, ITestSet, 0);
addcharset(compst, cs->cs);
return i;
}
default: assert(0); return 0;
}
}
}
static int finaltarget (Instruction *code, int i) {
Instruction *pc = &code[i];
while (opcode(pc) == IJmp) pc += addr(pc);
return pc - code;
}
static int finallabel (Instruction *code, int i) {
Instruction *pc = &code[i];
assert(addr(pc));
return finaltarget(code, i + addr(pc));
}
static int codebehind (CompileState *compst, TTree *tree) {
if (tree->u.n > 0)
addinstruction_aux(compst, IBehind, tree->u.n);
return codegen(compst, sib1(tree), 0, NOINST, fullset);
}
static int codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
const Charset *fl) {
int err;
int haltp2 = (p2->tag == THalt);
int emptyp2 = (p2->tag == TTrue);
Charset cs1, cs2;
int e1 = getfirst(p1, fullset, &cs1);
if (!haltp2 && (headfail(p1) ||
(!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2))))) {
int test = codetestset(compst, &cs1, 0);
int jmp = NOINST;
err = codegen(compst, p1, 0, test, fl);
if (err) return err;
if (!emptyp2)
jmp = addinstruction_offset(compst, IJmp, 0);
jumptohere(compst, test);
err = codegen(compst, p2, opt, NOINST, fl);
if (err) return err;
jumptohere(compst, jmp);
}
else if (!haltp2 && opt && emptyp2) {
jumptohere(compst, addinstruction_offset(compst, IPartialCommit, 0));
err = codegen(compst, p1, 1, NOINST, fullset);
if (err) return err;
}
else {
int pcommit;
int test = codetestset(compst, &cs1, e1);
int pchoice = addinstruction_offset(compst, IChoice, 0);
err = codegen(compst, p1, emptyp2, test, fullset);
if (err) return err;
pcommit = addinstruction_offset(compst, ICommit, 0);
jumptohere(compst, pchoice);
jumptohere(compst, test);
err = codegen(compst, p2, opt, NOINST, fl);
if (err) return err;
jumptohere(compst, pcommit);
}
return 0;
}
static int codeand (CompileState *compst, TTree *tree, int tt) {
int err;
int n = fixedlen(tree);
if (n >= 0 && n <= MAXBEHIND && !hascaptures(tree)) {
err = codegen(compst, tree, 0, tt, fullset);
if (err) return err;
if (n > 0)
addinstruction_aux(compst, IBehind, n);
}
else {
int pcommit;
int pchoice = addinstruction_offset(compst, IChoice, 0);
err = codegen(compst, tree, 0, tt, fullset);
if (err) return err;
pcommit = addinstruction_offset(compst, IBackCommit, 0);
jumptohere(compst, pchoice);
addinstruction(compst, IFail);
jumptohere(compst, pcommit);
}
return 0;
}
static int codecapture (CompileState *compst, TTree *tree, int tt,
const Charset *fl) {
addinstcap(compst, IOpenCapture, tree->cap, tree->key);
if (tree->cap == Crosieconst) {
assert(sib1(tree)->tag == TTrue);
addinstruction_aux(compst, ICloseConstCapture, tree->u.n);
}
else {
int err = codegen(compst, sib1(tree), 0, tt, fl);
if (err) return err;
addinstruction(compst, ICloseCapture);
}
return 0;
}
static void codebackref (CompileState *compst, TTree *tree) {
addinstruction_aux(compst, IBackref, tree->key);
}
static int coderep (CompileState *compst, TTree *tree, int opt,
const Charset *fl) {
Charset st;
int err;
if (tocharset(tree, &st)) {
addinstruction(compst, ISpan);
addcharset(compst, st.cs);
}
else {
int e1 = getfirst(tree, fullset, &st);
if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) {
int jmp;
int test = codetestset(compst, &st, 0);
err = codegen(compst, tree, 0, test, fullset);
if (err) return err;
jmp = addinstruction_offset(compst, IJmp, 0);
jumptohere(compst, test);
jumptothere(compst, jmp, test);
}
else {
int commit, l2;
int test = codetestset(compst, &st, e1);
int pchoice = NOINST;
if (opt)
jumptohere(compst, addinstruction_offset(compst, IPartialCommit, 0));
else
pchoice = addinstruction_offset(compst, IChoice, 0);
l2 = gethere(compst);
err = codegen(compst, tree, 0, NOINST, fullset);
if (err) return err;
commit = addinstruction_offset(compst, IPartialCommit, 0);
jumptothere(compst, commit, l2);
jumptohere(compst, pchoice);
jumptohere(compst, test);
}
}
return 0;
}
static int codenot (CompileState *compst, TTree *tree) {
Charset st;
int e = getfirst(tree, fullset, &st);
int test = codetestset(compst, &st, e);
if (headfail(tree))
addinstruction(compst, IFail);
else {
int pchoice = addinstruction_offset(compst, IChoice, 0);
int err = codegen(compst, tree, 0, NOINST, fullset);
if (err) return err;
addinstruction(compst, IFailTwice);
jumptohere(compst, pchoice);
}
jumptohere(compst, test);
return 0;
}
static void correctcalls (CompileState *compst, int *positions,
int from, int to) {
int i;
Instruction *inst, *prev_inst, *final_target;
for (i = from; i < to; i += sizei(&getinstr(compst, i))) {
inst = &getinstr(compst, i);
if (opcode(inst) == IOpenCall) {
int n = addr(inst);
int rulepos = positions[n];
prev_inst = &getinstr(compst, rulepos - 1);
assert(rulepos == from || opcode(prev_inst) == IRet); UNUSED(prev_inst);
int ft = finaltarget(compst->p->code, i + 2);
final_target = &getinstr(compst, ft);
if (opcode(final_target) == IRet)
setopcode(inst, IJmp);
else {
setopcode(inst, ICall);
}
jumptothere(compst, i, rulepos);
assert(opcode(inst) == ((opcode(final_target)==IRet) ? IJmp : ICall));
assert(addr(inst) == (rulepos - i));
assert(addr(inst));
}
}
assert(i == to);
}
static int codegrammar (CompileState *compst, TTree *grammar) {
int positions[MAXRULES];
int rulenumber = 0;
TTree *rule;
int firstcall = addinstruction_offset(compst, ICall, 0);
int jumptoend = addinstruction_offset(compst, IJmp, 0);
int start = gethere(compst);
jumptohere(compst, firstcall);
for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
positions[rulenumber++] = gethere(compst);
int err = codegen(compst, sib1(rule), 0, NOINST, fullset);
if (err) return err;
addinstruction(compst, IRet);
}
assert(rule->tag == TTrue);
jumptohere(compst, jumptoend);
correctcalls(compst, positions, start, gethere(compst));
return 0;
}
static void codecall (CompileState *compst, TTree *call) {
addinstruction_offset(compst, IOpenCall, sib2(call)->cap);
assert(sib2(call)->tag == TRule);
}
static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2,
int *tt, const Charset *fl) {
int err;
if (needfollow(p1)) {
Charset fl1;
getfirst(p2, fl, &fl1);
err = codegen(compst, p1, 0, *tt, &fl1);
if (err) return err;
} else {
err = codegen(compst, p1, 0, *tt, fullset);
if (err) return err;
}
if (fixedlen(p1) != 0) {
*tt = NOINST;
return 0;
} else {
return 0;
}
}
static int codegen (CompileState *compst, TTree *tree, int opt, int tt,
const Charset *fl) {
int err;
tailcall:
assert(tree != NULL);
switch (tree->tag) {
case TChar: codechar(compst, tree->u.n, tt); break;
case TAny: addinstruction(compst, IAny); break;
case TSet: codecharset(compst, treebuffer(tree), tt); break;
case TTrue: break;
case TFalse: addinstruction(compst, IFail); break;
case THalt: addinstruction(compst, IHalt); break;
case TChoice: return codechoice(compst, sib1(tree), sib2(tree), opt, fl); break;
case TRep: return coderep(compst, sib1(tree), opt, fl); break;
case TBehind: return codebehind(compst, tree); break;
case TNot: return codenot(compst, sib1(tree)); break;
case TAnd: return codeand(compst, sib1(tree), tt); break;
case TCapture: return codecapture(compst, tree, tt, fl); break;
case TBackref: codebackref(compst, tree); break;
case TGrammar: return codegrammar(compst, tree); break;
case TCall: codecall(compst, tree); break;
case TSeq: {
err = codeseq1(compst, sib1(tree), sib2(tree), &tt, fl);
if (err) return err;
tree = sib2(tree); goto tailcall;
}
case TNoTree: return 1;
case TRunTime: return 2;
default: return 3;
}
return 0;
}
static void peephole (CompileState *compst) {
Instruction *code = compst->p->code;
Instruction *target_inst, *inst = NULL;
int i;
for (i = 0; i < compst->ncode; i += sizei(&code[i])) {
redo:
inst = &code[i];
switch (opcode(inst)) {
case IPartialCommit: case ITestAny:
case ICall: case IChoice:
case ICommit: case IBackCommit:
case ITestChar: case ITestSet: {
int final = finallabel(code, i);
jumptothere(compst, i, final);
break;
}
case IJmp: {
int ft = finaltarget(code, i);
assert( ft < compst->ncode );
target_inst = &code[ft];
switch (opcode(target_inst)) {
case IRet: case IFail: case IFailTwice:
case IHalt: case IEnd: {
code[i] = code[ft];
code[i+1].i.code = IAny;
break;
}
case ICommit: case IPartialCommit:
case IBackCommit: {
int fft = finallabel(code, ft);
assert( fft < compst->ncode );
code[i] = code[ft];
jumptothere(compst, i, fft);
goto redo;
}
case IOpenCall: {
assert(0); }
default: {
jumptothere(compst, i, ft);
break;
}}
break;
}
case IOpenCall: {
assert(0); }
default: break;
}
}
assert((inst == NULL) || (opcode(inst) == IEnd));
}
Instruction *compile (lua_State *L, Pattern *p) {
CompileState compst;
compst.p = p; compst.ncode = 0; compst.L = L;
realloccode(L, p, 2);
if (codegen(&compst, p->tree, 0, NOINST, fullset)) return NULL;
addinstruction(&compst, IEnd);
realloccode(L, p, compst.ncode);
peephole(&compst);
return p->code;
}