#include <limits.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include "config.h"
#include "rplx.h"
#include "buf.h"
#include "vm.h"
#ifndef VMDEBUG
#define VMDEBUG 0
#else
#define VMDEBUG 1
#endif
#if VMDEBUG
#define Announce_doublecap(oldsize) do { \
fprintf(stderr, "*** doubling current capture array size of %d\n", (oldsize)); \
} while(0);
extern void printcode(Instruction *p);
#else
#define Announce_doublecap(oldsize)
static void printcode(Instruction *p) {UNUSED(p);}
#endif
static const Instruction giveup = {.i.code = IGiveup, .i.aux = 0};
typedef struct BTEntry {
byte_ptr s;
const Instruction *p;
int caplevel;
} BTEntry;
int sizei (const Instruction *pc) {
switch((Opcode) opcode(pc)) {
case IPartialCommit: case ITestAny: case IJmp:
case ICall: case IOpenCall: case IChoice:
case ICommit: case IBackCommit: case IOpenCapture:
case ITestChar:
return 2;
case ISet: case ISpan:
return CHARSETINSTSIZE;
case ITestSet:
return 1 + CHARSETINSTSIZE;
default:
return 1;
}
}
#include "stack.c"
STACK_OF(BTEntry, INIT_BACKTRACKSTACK)
STACK_INIT_FUNCTION(BTEntry)
STACK_FREE_FUNCTION(BTEntry)
STACK_EXPAND_FUNCTION(BTEntry, MAX_BACKTRACK)
STACK_PUSH_FUNCTION(BTEntry, RECORD_VMSTATS)
STACK_POP_FUNCTION(BTEntry)
static Capture *doublecap (Capture *cap, Capture *initial_capture, int captop) {
if (captop >= MAX_CAPLISTSIZE) return NULL;
Announce_doublecap(captop);
Capture *newc;
int newcapsize = 2 * captop;
if (newcapsize > MAX_CAPLISTSIZE) newcapsize = MAX_CAPLISTSIZE;
newc = (Capture *)malloc(newcapsize * sizeof(Capture));
memcpy(newc, cap, captop * sizeof(Capture));
if (cap != initial_capture) free(cap);
return newc;
}
static void BTEntry_stack_print (BTEntry_stack *stack, byte_ptr o, Instruction *op) {
BTEntry *top;
for (top = (stack->next - 1); top >= stack->base; top--)
fprintf(stderr,
"%ld: pos=%ld, pc %ld: %s, caplevel=%d\n",
top - stack->base,
(top->s == NULL) ? -1 : (top->s - o),
(top->p == &giveup) ? -1 : (top->p - op),
OPCODE_NAME(opcode(top->p)),
top->caplevel);
}
#define BACKREF_DEBUG 0
#if BACKREF_DEBUG
static void print_caplist(Capture *capture, int captop, Ktable *kt) {
for(int i = 0; i < captop; i++) {
if (isopencap(&capture[i])) {
printf("(%d %s ", i, ktable_element(kt, capidx(&capture[i])));
} else {
if (isclosecap(&capture[i])) {
printf("%d) ", i);
} else {
printf("** %d ** ", i);
}
}
}
printf("\n");
}
#endif
static int find_prior_capture(Capture *capture, int captop, int target_idx,
byte_ptr *s, byte_ptr *e, Ktable *kt) {
UNUSED(kt);
if (captop == 0) return 0;
#if BACKREF_DEBUG
print_caplist(capture, captop, kt);
printf("Target is [%d]%s, captop = %d\n", target_idx, ktable_element(kt, target_idx), captop);
#endif
int i;
for (i = captop - 1; i > 0; i--) {
if (!isopencap(&capture[i])) break;
}
int cap_end = i;
#if BACKREF_DEBUG
printf("declaring this to be the end of the cap list: cap_end = %d\n", cap_end);
#endif
int outer_cap = 0;
int outer_capidx = 0;
int balance = 0;
for (; i > 0; i--) {
#if BACKREF_DEBUG
printf("looking for first unclosed open, i = %d\n", i);
#endif
if (isopencap(&capture[i])) {
if (balance == 0) break;
balance += 1;
} else {
if (isclosecap(&capture[i])) {
balance -= 1;
}
}
}
outer_cap = i;
outer_capidx = capidx(&capture[i]);
#if BACKREF_DEBUG
printf("Found FIRST unclosed open at %d: [%d]%s\n", outer_cap, outer_capidx, ktable_element(kt, outer_capidx));
#endif
for (i = cap_end; i >= outer_cap; i--) {
#if BACKREF_DEBUG
printf("looking for target at i=%d\n", i);
#endif
if (isopencap(&capture[i]) && capidx(&capture[i]) == target_idx) {
#if BACKREF_DEBUG
printf("found candidate target; now determining if it is inside [%d]%s\n",
outer_cap, ktable_element(kt, outer_capidx));
#endif
balance = 0;
int j;
for (j = i - 1; j >= outer_cap; j--) {
if (isopencap(&capture[j])) {
#if BACKREF_DEBUG
printf("looking at open capture j = %d\n", j);
#endif
if ((balance >= 0) && capidx(&capture[j]) == outer_capidx) break;
balance += 1;
} else {
if (isclosecap(&capture[j])) {
balance -= 1;
}
}
}
if (j == outer_cap) {
#if BACKREF_DEBUG
printf("No other instances of outer_cap to skip over\n");
#endif
break;
}
}
}
if (i == outer_cap - 1) {
#if BACKREF_DEBUG
printf("did not find target; continuing the backwards search\n");
#endif
for (i = outer_cap; i >= 0; i--) {
if (isopencap(&capture[i]) && capidx(&capture[i]) == target_idx) break;
}
if (i < 0) return 0;
if (! (isopencap(&capture[i]) && capidx(&capture[i]) == target_idx)) {
return 0;
}
}
#if BACKREF_DEBUG
printf("FOUND open capture at i = %d, [%d]%s\n", i,
capidx(&capture[i]), ktable_element(kt, capidx(&capture[i])));
#endif
*s = capture[i].s;
i++;
int j = 0;
while (i <= captop) {
#if BACKREF_DEBUG
printf("looking at i = %d (captop = %d)\n", i, captop);
#endif
if (isclosecap(&capture[i])) {
if (j == 0) {
#if BACKREF_DEBUG
printf("i = %d: found close capture\n", i);
#endif
*e = capture[i].s;
return 1;
} else {
j--;
assert( j >= 0 );
}
} else {
assert( isopencap(&capture[i]) );
j++;
}
i++;
}
#if BACKREF_DEBUG
printf("did not find matching close!\n");
#endif
return 0;
}
#if 0#else
#define PRINT_VM_STATE UNUSED(BTEntry_stack_print)
#endif
#if (RECORD_VMSTATS)
#define INCR_STAT(action, var) if ((action)) (var)++
#define UPDATE_STAT(action, var, value) if ((action)) (var) = (value)
#define UPDATE_CAPSTATS(inst) capstats[opcode(inst)==IOpenCapture ? addr(inst) : Cclose]++
#else
#define INCR_STAT(action, var) UNUSED((stats))
#define UPDATE_STAT(action, var, value) UNUSED((stats))
#define UPDATE_CAPSTATS(inst) UNUSED(capstats)
#endif
#define PUSH_CAPLIST \
if (++captop >= capsize) { \
capture = doublecap(capture, initial_capture, captop); \
if (!capture) { \
return MATCH_ERR_CAP; \
} \
*capturebase = capture; \
capsize = 2 * captop; \
}
#define JUMPBY(delta) pc = pc + (delta)
static int vm (byte_ptr *r,
byte_ptr o, byte_ptr s, byte_ptr e,
Instruction *op, Capture **capturebase,
Stats *stats, int capstats[], Ktable *kt) {
BTEntry_stack stack;
BTEntry_stack_init(&stack);
Capture *initial_capture = *capturebase;
Capture *capture = *capturebase;
int capsize = INIT_CAPLISTSIZE;
int captop = 0;
const Instruction *pc = op;
BTEntry_stack_push(&stack, (BTEntry) {s, &giveup, 0});
for (;;) {
PRINT_VM_STATE;
INCR_STAT(stats, stats->insts);
switch (opcode(pc)) {
case ITestSet: {
assert(sizei(pc)==1+CHARSETINSTSIZE);
assert(addr(pc));
if (s < e && testchar((pc+2)->buff, (int)((byte)*s)))
JUMPBY(1+CHARSETINSTSIZE);
else JUMPBY(addr(pc));
continue;
}
case IAny: {
assert(sizei(pc)==1);
if (s < e) { JUMPBY(1); s++; }
else goto fail;
continue;
}
case IPartialCommit: {
assert(sizei(pc)==2);
assert(addr(pc));
assert(stack.next > stack.base && TOP(stack)->s != NULL);
TOP(stack)->s = s;
TOP(stack)->caplevel = captop;
JUMPBY(addr(pc));
continue;
}
case IEnd: {
assert(sizei(pc)==1);
assert(stack.next == stack.base + 1);
setcapkind(&capture[captop], Cclose);
capture[captop].s = NULL;
UPDATE_STAT(stats, stats->backtrack, stack.maxtop);
BTEntry_stack_free(&stack);
*r = s;
return MATCH_OK;
}
case IGiveup: {
assert(sizei(pc)==1);
assert(stack.next == stack.base);
UPDATE_STAT(stats, stats->backtrack, stack.maxtop);
BTEntry_stack_free(&stack);
*r = NULL;
return MATCH_OK;
}
case IRet: {
assert(sizei(pc)==1);
assert(stack.next > stack.base);
assert(TOP(stack)->s == NULL);
pc = TOP(stack)->p;
BTEntry_stack_pop(&stack);
continue;
}
case ITestAny: {
assert(sizei(pc)==2);
assert(addr(pc));
if (s < e) JUMPBY(2);
else JUMPBY(addr(pc));
continue;
}
case IChar: {
assert(sizei(pc)==1);
if (s < e && ((byte)*s == ichar(pc))) { JUMPBY(1); s++; }
else goto fail;
continue;
}
case ITestChar: {
assert(sizei(pc)==2);
assert(addr(pc));
if (s < e && ((byte)*s == ichar(pc))) JUMPBY(2);
else JUMPBY(addr(pc));
continue;
}
case ISet: {
assert(sizei(pc)==CHARSETINSTSIZE);
if (s < e && testchar((pc+1)->buff, (int)((byte)*s)))
{ JUMPBY(CHARSETINSTSIZE);
s++;
}
else { goto fail; }
continue;
}
case IBehind: {
assert(sizei(pc)==1);
int n = index(pc);
if (n > s - o) goto fail;
s -= n; JUMPBY(1);
continue;
}
case ISpan: {
assert(sizei(pc)==CHARSETINSTSIZE);
for (; s < e; s++) {
if (!testchar((pc+1)->buff, (int)((byte)*s))) break;
}
JUMPBY(CHARSETINSTSIZE);
continue;
}
case IJmp: {
assert(sizei(pc)==2);
assert(addr(pc));
JUMPBY(addr(pc));
continue;
}
case IChoice: {
assert(sizei(pc)==2);
assert(addr(pc));
if (!BTEntry_stack_push(&stack, (BTEntry) {s, pc + addr(pc), captop}))
return MATCH_ERR_STACK;
JUMPBY(2);
continue;
}
case ICall: {
assert(sizei(pc)==2);
assert(addr(pc));
if (!BTEntry_stack_push(&stack, (BTEntry) {NULL, pc + 2, 0}))
return MATCH_ERR_STACK;
JUMPBY(addr(pc));
continue;
}
case ICommit: {
assert(sizei(pc)==2);
assert(addr(pc));
assert(stack.next > stack.base && TOP(stack)->s != NULL);
BTEntry_stack_pop(&stack);
JUMPBY(addr(pc));
continue;
}
case IBackCommit: {
assert(sizei(pc)==2);
assert(addr(pc));
assert(stack.next > stack.base && TOP(stack)->s != NULL);
s = TOP(stack)->s;
captop = TOP(stack)->caplevel;
BTEntry_stack_pop(&stack);
JUMPBY(addr(pc));
continue;
}
case IFailTwice:
assert(stack.next > stack.base);
BTEntry_stack_pop(&stack);
case IFail:
assert(sizei(pc)==1);
fail: {
do {
assert(stack.next > stack.base);
s = TOP(stack)->s;
BTEntry_stack_pop(&stack);
} while (s == NULL);
captop = PEEK(stack, 1)->caplevel;
pc = PEEK(stack, 1)->p;
continue;
}
case IBackref: {
assert(sizei(pc)==1);
byte_ptr startptr = NULL;
byte_ptr endptr = NULL;
int target = index(pc);
int have_prior = find_prior_capture(capture, captop, target, &startptr, &endptr, kt);
if (have_prior) {
assert( startptr && endptr );
assert( endptr >= startptr );
size_t prior_len = endptr - startptr;
if ( ((size_t)(e - s) >= prior_len) && (memcmp(s, startptr, prior_len) == 0) ) {
s += prior_len;
JUMPBY(1);
continue;
}
}
goto fail;
}
case ICloseConstCapture: {
assert(sizei(pc)==1);
assert(index(pc));
assert(captop > 0);
capture[captop].s = s;
setcapidx(&capture[captop], index(pc));
setcapkind(&capture[captop], Ccloseconst);
goto pushcapture;
}
case ICloseCapture: {
assert(sizei(pc)==1);
assert(captop > 0);
capture[captop].s = s;
setcapkind(&capture[captop], Cclose);
pushcapture:
UPDATE_CAPSTATS(pc);
PUSH_CAPLIST;
UPDATE_STAT(stats, stats->caplist, captop);
JUMPBY(1);
continue;
}
case IOpenCapture: {
assert(sizei(pc)==2);
capture[captop].s = s;
setcapidx(&capture[captop], index(pc));
setcapkind(&capture[captop], addr(pc));
UPDATE_CAPSTATS(pc);
PUSH_CAPLIST;
UPDATE_STAT(stats, stats->caplist, captop);
JUMPBY(2);
continue;
}
case IHalt: {
assert(sizei(pc)==1);
setcapkind(&capture[captop], Cfinal);
capture[captop].s = s;
*r = s;
UPDATE_STAT(stats, stats->backtrack, stack.maxtop);
BTEntry_stack_free(&stack);
return MATCH_OK;
}
default: {
if (VMDEBUG) {
fprintf(stderr, "Illegal opcode at %d: %d\n", (int) (pc - op), opcode(pc));
printcode(op);
}
assert(0);
BTEntry_stack_free(&stack);
return MATCH_ERR_BADINST;
} }
}
}
typedef struct Cap {
byte_ptr start;
int count;
} Cap;
STACK_OF(Cap, INIT_CAPDEPTH)
STACK_INIT_FUNCTION(Cap)
STACK_FREE_FUNCTION(Cap)
STACK_EXPAND_FUNCTION(Cap, MAX_CAPDEPTH)
STACK_PUSH_FUNCTION(Cap, RECORD_VMSTATS)
STACK_POP_FUNCTION(Cap)
#define capstart(cs) (capkind((cs)->cap)==Crosieconst ? NULL : (cs)->cap->s)
static int caploop (CapState *cs, Encoder encode, Buffer *buf, unsigned int *max_capdepth) {
int err;
byte_ptr start;
int count = 0;
Cap_stack stack;
Cap_stack_init(&stack);
if (!Cap_stack_push(&stack, (Cap) {capstart(cs), 0})) {
Cap_stack_free(&stack);
return MATCH_STACK_ERROR;
}
err = encode.Open(cs, buf, 0);
if (err) { Cap_stack_free(&stack); return err; }
cs->cap++;
while (STACK_SIZE(stack) > 0) {
while (isopencap(cs->cap)) {
if (!Cap_stack_push(&stack, (Cap) {capstart(cs), count})) {
Cap_stack_free(&stack);
return MATCH_STACK_ERROR;
}
err = encode.Open(cs, buf, count);
if (err) { Cap_stack_free(&stack); return err; }
count = 0;
cs->cap++;
}
count = TOP(stack)->count;
start = TOP(stack)->start;
Cap_stack_pop(&stack);
if (isfinalcap(cs->cap)) {
Capture synthetic;
synthetic.s = cs->cap->s;
setcapidx(&synthetic, 0);
setcapkind(&synthetic, Cclose);
cs->cap = &synthetic;
while (1) {
err = encode.Close(cs, buf, count, start);
if (err) { Cap_stack_free(&stack); return err; }
if (STACK_SIZE(stack)==0) break;
Cap_stack_pop(&stack);
count = TOP(stack)->count;
start = TOP(stack)->start;
}
*max_capdepth = stack.maxtop;
Cap_stack_free(&stack);
return MATCH_HALT;
}
assert(!isopencap(cs->cap));
err = encode.Close(cs, buf, count, start);
if (err) { Cap_stack_free(&stack); return err; }
cs->cap++;
count++;
}
*max_capdepth = stack.maxtop;
Cap_stack_free(&stack);
return MATCH_OK;
}
static int walk_captures (Capture *capture, byte_ptr s,
Ktable *kt, Encoder encode,
Buffer *buf, int *abend, Stats *stats) {
int err;
*abend = 0;
if (isfinalcap(capture)) {
*abend = 1;
goto done;
}
if (!isclosecap(capture)) {
CapState cs;
cs.ocap = cs.cap = capture;
cs.s = s;
cs.kt = kt;
unsigned int max_capdepth = 0;
err = caploop(&cs, encode, buf, &max_capdepth);
UPDATE_STAT(stats, stats->capdepth, max_capdepth);
if (err == MATCH_HALT) {
*abend = 1;
goto done;
}
else
if (err) return err;
}
done:
return MATCH_OK;
}
int vm_match2 (
Chunk *chunk,
struct rosie_string *input, uint32_t startpos, uint32_t endpos,
Encoder encode,
uint8_t collect_times,
Buffer *output,
struct rosie_matchresult *match_result) {
Capture initial_capture[INIT_CAPLISTSIZE];
Capture *capture = initial_capture;
int err, abend;
int t0 = 0, tmatch = 0;
byte_ptr r;
Stats stats;
int capstats[256] = {0};
if (input->len > UINT_MAX) return MATCH_ERR_INPUT_LEN;
if (startpos != 0) startpos--;
if (endpos == 0) endpos = input->len;
else endpos--;
if (startpos > input->len) return MATCH_ERR_STARTPOS;
if (endpos < startpos) return MATCH_ERR_ENDPOS;
stats = (Stats) {match_result->ttotal, match_result->tmatch, 0, 0, 0, 0};
if (collect_times) t0 = clock();
err = vm(&r, input->ptr, input->ptr + startpos, input->ptr + endpos,
chunk->code,
&capture,
collect_times ? &stats : NULL,
capstats,
chunk->ktable);
#if (VMDEBUG)
fprintf(stderr, "*** vm() completed with err code %d, r as position = %ld\n",
err, r ? r - input_ptr : 0);
if (stats) fprintf(stderr, "*** vm executed %d instructions\n", stats->insts);
fprintf(stderr, "*** capstats from vm: Close %d, Rosiecap %d\n",
capstats[Cclose], capstats[Crosiecap]);
for(int ii=0; ii<256; ii++)
if (!(ii==Cclose || ii==Crosiecap || ii==Crosieconst || ii==Cbackref))
assert(capstats[ii]==0);
#endif
if (err != MATCH_OK) goto done;
if (collect_times) {
tmatch = clock();
match_result->tmatch += tmatch - t0;
}
if (r == NULL) {
match_result->data.ptr = NULL;
match_result->data.len = 0;
match_result->leftover = endpos - startpos;
match_result->abend = 0;
if (collect_times) match_result->ttotal += tmatch - t0;
goto done;
}
if (encode.Open) {
err = walk_captures(capture, input->ptr, chunk->ktable, encode,
output, &abend, &stats);
if (err != MATCH_OK) goto done;
match_result->data.ptr = output->data;
match_result->data.len = output->n;
} else {
match_result->data.ptr = NULL;
match_result->data.len = MATCH_WITHOUT_DATA;
abend = 0;
}
if (collect_times) match_result->ttotal += clock() - t0;
match_result->leftover = endpos - (r - input->ptr);
match_result->abend = abend;
done:
if (capture != initial_capture) free(capture);
return err;
}