#if defined(__linux__) && !defined(_GNU_SOURCE)
#define _GNU_SOURCE
#endif
#include <stdlib.h>
#include <alloca.h>
#include <ctype.h>
#include <limits.h>
#include <string.h>
#include <time.h>
#include "lua.h"
#include "lauxlib.h"
#include "lptypes.h"
#include "lpcap.h"
#include "lpcode.h"
#include "lpprint.h"
#include "file.h"
#include "lptree.h"
#include "str.h"
#include "rbuf.h"
#include "rpeg.h"
#include "ktable.h"
#include "ktable-macros.h"
#include "json.h"
#if !defined(DEBUG)
#define DEBUG 0
#endif
static const char *strerror_error(const char *filename, int line, int code) {
char *error_message;
if (!asprintf(&error_message, "%s:%d: INVALID ERROR CODE %d", filename, line, code))
return "ERROR: asprintf failed";
return error_message;
}
#define MESSAGES_LEN(array) ((int) ((sizeof (array) / sizeof (const char *))))
#define STRERROR(code, message_list) \
( (((code) > 0) && ((code) < MESSAGES_LEN(message_list))) \
? (message_list)[(code)] \
: strerror_error(__FILE__, __LINE__, code) )
static TTree *newgrammar (lua_State *L, int arg);
static const char *val2str (lua_State *L, int idx) {
const char *k = lua_tostring(L, idx);
if (k != NULL)
return lua_pushfstring(L, "%s", k);
else
return lua_pushfstring(L, "(a %s)", luaL_typename(L, idx));
}
static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t, Ktable *kt) {
int n;
size_t len;
const char *rulename = ktable_element_name(kt, t->key, &len);
lua_pushlstring(L, rulename, len);
lua_gettable(L, postable);
n = lua_tonumber(L, -1);
lua_pop(L, 1);
if (n == 0) {
luaL_error(L, "rule '%s' undefined in given grammar", rulename);
}
t->tag = TCall;
t->u.ps = n - (t - g);
assert(sib2(t)->tag == TRule);
sib2(t)->key = t->key;
}
static void correctassociativity (TTree *tree) {
TTree *t1 = sib1(tree);
assert(tree->tag == TChoice || tree->tag == TSeq);
while (t1->tag == tree->tag) {
int n1size = tree->u.ps - 1;
int n11size = t1->u.ps - 1;
int n12size = n1size - n11size - 1;
memmove(sib1(tree), sib1(t1), n11size * sizeof(TTree));
tree->u.ps = n11size + 1;
sib2(tree)->tag = tree->tag;
sib2(tree)->u.ps = n12size + 1;
}
}
#if 0#endif
static void finalfix (lua_State *L, int postable, TTree *g, TTree *t, Ktable *kt) {
tailcall:
switch (t->tag) {
case TGrammar:
return;
case TBackref:
return;
case TOpenCall: {
if (g != NULL)
fixonecall(L, postable, g, t, kt);
else {
luaL_error(L, "rule '%s' used outside a grammar", ktable_element(kt, t->key));
}
break;
}
case TSeq: case TChoice:
correctassociativity(t);
break;
case TNoTree:
luaL_error(L, "cannot compile expression containing a precompiled pattern");
break;
}
switch (numsiblings[t->tag]) {
case 1:
t = sib1(t); goto tailcall;
case 2:
finalfix(L, postable, g, sib1(t), kt);
t = sib2(t); goto tailcall;
default: assert(numsiblings[t->tag] == 0); break;
}
}
static Pattern *getpattern (lua_State *L, int idx) {
Pattern *p = (Pattern *)luaL_checkudata(L, idx, PATTERN_T);
assert(p->kt);
return p;
}
void *extract_pattern (lua_State *L, int idx) {
void *p = luaL_checkudata(L, idx, PATTERN_T);
return p;
}
static int getsize (lua_State *L, int idx) {
return (lua_rawlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1;
}
static TTree *gettree (lua_State *L, int idx, int *len) {
Pattern *p = getpattern(L, idx);
if (len)
*len = getsize(L, idx);
return p->tree;
}
static void newktable (lua_State *L, int n) {
pushnewktable(L, n);
setktable(L, -2);
}
static int addtoktable (lua_State *L, int idx) {
if (lua_isnil(L, idx))
return 0;
else
if (lua_isstring(L, idx)) {
LOGf("adding '%s'\n", lua_tostring(L, idx));
}
int n;
getktable(L, -1);
Ktable *kt = lua_touserdata(L, -1);
lua_pop(L, 1);
n = ktable_len(kt);
assert( n >= 0 && n <= KTABLE_INDEX_T_MAX );
if (n >= KTABLE_INDEX_T_MAX)
return luaL_error(L, "(add) too many captures in pattern: %d", n);
else if (!lua_isstring(L, idx))
return luaL_error(L, "(add) ktable entry is not a string: %s", lua_tostring(L, idx));
else {
size_t len;
const char *s = lua_tolstring(L, idx, &len);
assert (s);
if (!ktable_add(kt, (const char *)s, len))
luaL_error(L, "(add) out of memory");
return n+1;
}
}
static int concattable (lua_State *L, int idx1, int idx2) {
if (!isktable(L, idx1) || !isktable(L, idx2))
luaL_error(L, "in concattable, did not find ktables");
assert( (idx1 < 0) && (idx2 < 0) );
Ktable *kt1 = lua_touserdata(L, idx1);
Ktable *kt2 = lua_touserdata(L, idx2);
assert( kt1 != kt2 );
int n = 0;
int err = ktable_concat(kt1, kt2, &n);
if (err) {
const char *msg = KTABLE_MESSAGES[err];
luaL_error(L, msg);
}
return n;
}
static void correctkeys (TTree *tree, int n) {
if (n == 0) return;
tailcall:
assert(tree != NULL);
switch (tree->tag) {
case TOpenCall: case TCall: case TRunTime: case TRule: case TBackref: {
if (tree->key > 0)
tree->key += n;
break;
}
case TCapture: {
if (tree->key > 0)
tree->key += n;
if (tree->u.n > 0)
tree->u.n += n;
break;
}
default: break;
}
switch (numsiblings[tree->tag]) {
case 1:
tree = sib1(tree); goto tailcall;
case 2:
correctkeys(sib1(tree), n);
tree = sib2(tree); goto tailcall;
default: assert(numsiblings[tree->tag] == 0); break;
}
}
static void joinktables (lua_State *L, int p1, TTree *t2, int p2) {
int n1, n2;
assert( (p1 > 0) && (p2 > 0) );
getktable(L, p1);
getktable(L, p2);
assert( lua_touserdata(L, -1) != NULL );
assert( lua_touserdata(L, -2) != NULL );
Ktable *k2 = lua_touserdata(L, -1);
Ktable *k1 = lua_touserdata(L, -2);
n1 = ktable_len(k1);
n2 = ktable_len(k2);
pushnewktable(L, n1 + n2);
int n = concattable(L, -3, -1);
UNUSED(n);
assert( n >= 0 );
if (k1 != k2) {
n = concattable(L, -2, -1);
assert( n >= 0 );
}
Ktable *newkt = lua_touserdata(L, -1); UNUSED(newkt);
assert( ktable_len(newkt) == ((k1 == k2) ? n1 : (n1 + n2)) );
setktable(L, -4);
lua_pop(L, 2);
if (k1 != k2) {
correctkeys(t2, n1);
}
}
static void copyktable (lua_State *L, int idx) {
assert( idx > 0 );
getktable(L, idx);
pushnewktable(L, ktable_len((Ktable *)lua_touserdata(L, -1)));
concattable(L, -2, -1);
setktable(L, -3);
lua_pop(L, 1);
}
static void mergektable (lua_State *L, int idx, TTree *stree) {
int n;
assert( idx > 0 );
getktable(L, -1);
getktable(L, idx);
assert( lua_islightuserdata(L, -1) && lua_islightuserdata(L, -2) );
assert( lua_touserdata(L, -1) != lua_touserdata(L, -2) );
n = concattable(L, -1, -2);
if (n < 0) {
LOGf("concattable result is error code %d\n", n);
const char *msg = KTABLE_MESSAGES[-n];
luaL_error(L, msg);
}
lua_pop(L, 2);
correctkeys(stree, n);
}
static int addtonewktable (lua_State *L, int p, int idx) {
newktable(L, 1);
if (p)
mergektable(L, p, NULL);
return addtoktable(L, idx);
}
static int testpattern (lua_State *L, int idx) {
if (lua_touserdata(L, idx)) {
if (lua_getmetatable(L, idx)) {
luaL_getmetatable(L, PATTERN_T);
if (lua_rawequal(L, -1, -2)) {
lua_pop(L, 2);
return 1;
}
}
}
return 0;
}
static TTree *newtree (lua_State *L, int len) {
size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern);
Pattern *p = (Pattern *)lua_newuserdata(L, size);
luaL_getmetatable(L, PATTERN_T);
lua_pushvalue(L, -1);
lua_setuservalue(L, -3);
lua_setmetatable(L, -2);
p->code = NULL; p->codesize = 0;
p->kt = ktable_new(0, 0);
return p->tree;
}
static TTree *newleaf (lua_State *L, int tag) {
TTree *tree = newtree(L, 1);
tree->tag = tag;
tree->key = 0;
return tree;
}
static TTree *newcharset (lua_State *L) {
TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1);
tree->tag = TSet;
tree->key = 0;
loopset(i, treebuffer(tree)[i] = 0);
return tree;
}
static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) {
tree->tag = TSeq; tree->u.ps = sibsize + 1;
memcpy(sib1(tree), sib, sibsize * sizeof(TTree));
return sib2(tree);
}
static void fillseq (TTree *tree, int tag, int n, const char *s) {
int i;
assert( (tag==TAny) || (tag==TChar) );
for (i = 0; i < n - 1; i++) {
tree->key = 0;
tree->tag = TSeq; tree->u.ps = 2;
sib1(tree)->tag = tag;
sib1(tree)->u.n = s ? (byte)s[i] : 0;
tree = sib2(tree);
}
tree->tag = tag;
tree->u.n = s ? (byte)s[i] : 0;
tree->key = 0;
}
static TTree *numtree (lua_State *L, int n) {
if (n == 0)
return newleaf(L, TTrue);
else {
TTree *tree, *nd;
if (n > 0)
tree = nd = newtree(L, 2 * n - 1);
else {
n = -n;
tree = newtree(L, 2 * n);
tree->tag = TNot;
nd = sib1(tree);
}
fillseq(nd, TAny, n, NULL);
return tree;
}
}
static TTree *getpatt (lua_State *L, int idx, int *len) {
TTree *tree;
switch (lua_type(L, idx)) {
case LUA_TSTRING: {
size_t slen;
const char *s = lua_tolstring(L, idx, &slen);
if (slen == 0)
tree = newleaf(L, TTrue);
else {
tree = newtree(L, 2 * (slen - 1) + 1);
fillseq(tree, TChar, slen, s);
}
break;
}
case LUA_TNUMBER: {
int n = lua_tointeger(L, idx);
tree = numtree(L, n);
break;
}
case LUA_TBOOLEAN: {
tree = (lua_toboolean(L, idx) ? newleaf(L, TTrue) : newleaf(L, TFalse));
break;
}
case LUA_TTABLE: {
tree = newgrammar(L, idx);
break;
}
default: {
return gettree(L, idx, len);
}
}
lua_replace(L, idx);
if (len)
*len = getsize(L, idx);
return tree;
}
static TTree *newroot1sib (lua_State *L, int tag) {
int s1;
TTree *tree1 = getpatt(L, 1, &s1);
TTree *tree = newtree(L, 1 + s1);
tree->tag = tag;
tree->key = 0;
memcpy(sib1(tree), tree1, s1 * sizeof(TTree));
copyktable(L, 1);
return tree;
}
static TTree *newroot2sib (lua_State *L, int tag) {
int s1, s2;
TTree *tree1 = getpatt(L, 1, &s1);
TTree *tree2 = getpatt(L, 2, &s2);
getktable(L, 1);
getktable(L, 2);
Ktable *kt1 = lua_touserdata(L, -1); UNUSED(kt1);
Ktable *kt2 = lua_touserdata(L, -2); UNUSED(kt2);
lua_pop(L, 2);
assert( kt1 != NULL );
assert( kt2 != NULL );
TTree *tree = newtree(L, 1 + s1 + s2);
tree->tag = tag;
tree->u.ps = 1 + s1;
memcpy(sib1(tree), tree1, s1 * sizeof(TTree));
memcpy(sib2(tree), tree2, s2 * sizeof(TTree));
joinktables(L, 1, sib2(tree), 2);
return tree;
}
static int lp_P (lua_State *L) {
luaL_checkany(L, 1);
getpatt(L, 1, NULL);
lua_settop(L, 1);
return 1;
}
static int lp_halt (lua_State *L) {
newleaf(L, THalt);
return 1;
}
static int lp_seq (lua_State *L) {
TTree *tree1 = getpatt(L, 1, NULL);
TTree *tree2 = getpatt(L, 2, NULL);
if (tree1->tag == TFalse || tree2->tag == TTrue)
lua_pushvalue(L, 1);
else if (tree1->tag == TTrue)
lua_pushvalue(L, 2);
else
newroot2sib(L, TSeq);
return 1;
}
static int lp_choice (lua_State *L) {
Charset st1, st2;
TTree *t1 = getpatt(L, 1, NULL);
TTree *t2 = getpatt(L, 2, NULL);
if (tocharset(t1, &st1) && tocharset(t2, &st2)) {
TTree *t = newcharset(L);
loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]);
}
else if (nofail(t1) || t2->tag == TFalse)
lua_pushvalue(L, 1);
else if (t1->tag == TFalse)
lua_pushvalue(L, 2);
else
newroot2sib(L, TChoice);
return 1;
}
static int lp_star (lua_State *L) {
int size1;
int n = (int)luaL_checkinteger(L, 2);
TTree *tree1 = getpatt(L, 1, &size1);
if (n >= 0) {
TTree *tree = newtree(L, (n + 1) * (size1 + 1));
if (nullable(tree1))
luaL_error(L, "loop body may accept empty string");
while (n--)
tree = seqaux(tree, tree1, size1);
tree->tag = TRep;
memcpy(sib1(tree), tree1, size1 * sizeof(TTree));
}
else {
TTree *tree;
n = -n;
tree = newtree(L, n * (size1 + 3) - 1);
for (; n > 1; n--) {
tree->tag = TChoice; tree->u.ps = n * (size1 + 3) - 2;
sib2(tree)->tag = TTrue;
tree = sib1(tree);
tree = seqaux(tree, tree1, size1);
}
tree->tag = TChoice; tree->u.ps = size1 + 1;
sib2(tree)->tag = TTrue;
memcpy(sib1(tree), tree1, size1 * sizeof(TTree));
}
copyktable(L, 1);
return 1;
}
static int lp_and (lua_State *L) {
newroot1sib(L, TAnd);
return 1;
}
static int lp_not (lua_State *L) {
newroot1sib(L, TNot);
return 1;
}
static int lp_sub (lua_State *L) {
Charset st1, st2;
int s1, s2;
TTree *t1 = getpatt(L, 1, &s1);
TTree *t2 = getpatt(L, 2, &s2);
if (tocharset(t1, &st1) && tocharset(t2, &st2)) {
TTree *t = newcharset(L);
loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]);
}
else {
TTree *tree = newtree(L, 2 + s1 + s2);
tree->tag = TSeq;
tree->u.ps = 2 + s2;
sib1(tree)->tag = TNot;
memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree));
memcpy(sib2(tree), t1, s1 * sizeof(TTree));
joinktables(L, 1, sib1(tree), 2);
}
return 1;
}
static int lp_set (lua_State *L) {
size_t l;
const char *s = luaL_checklstring(L, 1, &l);
TTree *tree = newcharset(L);
while (l--) {
setchar(treebuffer(tree), (byte)(*s));
s++;
}
return 1;
}
static int lp_range (lua_State *L) {
int arg;
int top = lua_gettop(L);
TTree *tree = newcharset(L);
for (arg = 1; arg <= top; arg++) {
int c;
size_t l;
const char *r = luaL_checklstring(L, arg, &l);
luaL_argcheck(L, l == 2, arg, "range must have two characters");
for (c = (byte)r[0]; c <= (byte)r[1]; c++)
setchar(treebuffer(tree), c);
}
return 1;
}
static int lp_behind (lua_State *L) {
TTree *tree;
TTree *tree1 = getpatt(L, 1, NULL);
int n = fixedlen(tree1);
luaL_argcheck(L, n >= 0, 1, "pattern may not have fixed length");
luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures");
luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind");
tree = newroot1sib(L, TBehind);
tree->u.n = n;
return 1;
}
static int lp_V (lua_State *L) {
TTree *tree = newleaf(L, TOpenCall);
luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected");
tree->key = addtonewktable(L, 0, 1);
LOGf("just after call to addtonewktable, tree->key is %d\n", tree->key);
return 1;
}
static int capture_aux (lua_State *L, int cap, int labelidx) {
TTree *tree = newroot1sib(L, TCapture);
tree->cap = cap;
tree->key = (labelidx == 0) ? 0 : addtonewktable(L, 1, labelidx);
tree->u.ps = 0;
LOGf("just after call to addtonewktable, tree->key is %d\n", tree->key);
return 1;
}
static TTree *newemptycapkey (lua_State *L, int cap, int idx) {
TTree *tree = newtree(L, 2);
tree->tag = TCapture;
tree->cap = cap;
sib1(tree)->tag = TTrue;
tree->key = addtonewktable(L, 0, idx);
tree->u.n = 0;
LOG("just after call to addtonewktable\n");
return tree;
}
static TTree *newemptycapkey2 (lua_State *L, int cap, int idx, int idx2) {
TTree *tree = newemptycapkey(L, cap, idx);
tree->u.n = addtoktable(L, idx2);
return tree;
}
static Instruction *prepcompile (lua_State *L, Pattern *p);
static int r_codegen_if_needed (lua_State *L) {
Pattern *p = getpattern(L, 1);
if (p->code == NULL)
if (!prepcompile(L, p)) lua_pushinteger(L, p->codesize);
else lua_pushnil(L);
else lua_pushinteger(L, p->codesize);
return 1;
}
static int r_pattern_size (lua_State *L) {
Pattern *p = getpattern(L, 1);
size_t without_code = lua_rawlen(L, 1);
size_t codesize = p->codesize * sizeof(Instruction);
lua_pushinteger(L, without_code + codesize);
return 1;
}
static int r_userdata_size (lua_State *L) {
luaL_checktype(L, 1, LUA_TUSERDATA);
lua_pushinteger(L, lua_rawlen(L, 1));
return 1;
}
static int r_capture (lua_State *L) {
size_t len;
luaL_checklstring(L, 2, &len);
if (len == 0)
luaL_error(L, "capture name cannot be the empty string");
else {
if (len > KTABLE_MAX_ELEMENT_LEN) luaL_error(L, "capture name too long");
}
int result = capture_aux(L, Crosiecap, 2);
return result;
}
static int r_constcapture (lua_State *L) {
size_t len;
luaL_checklstring(L, 1, &len);
if (len > SHRT_MAX) luaL_error(L, "constant capture string too long");
luaL_checklstring(L, 2, &len);
if (len > SHRT_MAX) luaL_error(L, "capture name too long");
newemptycapkey2(L, Crosieconst, 2, 1);
return 1;
}
static int r_backref (lua_State *L) {
size_t len;
getpattern(L, 1);
luaL_checklstring(L, 2, &len);
if (len == 0) luaL_error(L, "capture name cannot be the empty string");
else {
if (len > KTABLE_MAX_ELEMENT_LEN) luaL_error(L, "capture name too long");
}
TTree *tree;
tree = newroot1sib(L, TBackref);
tree->key = addtonewktable(L, 0, 2);
return 1;
}
static void getfirstrule (lua_State *L, int arg, int postab) {
LOGf("*** getfirstrule(grammar table at stack position %d, postable at %d)\n", arg, postab);
int top;
(void)(top);
if (DEBUG) { top = lua_gettop(L); }
lua_rawgeti(L, arg, 1);
if (lua_isstring(L, -1)) {
lua_pushvalue(L, -1);
lua_gettable(L, arg);
}
else {
lua_pushinteger(L, 1);
lua_insert(L, -2);
}
if (!testpattern(L, -1)) {
if (lua_isnil(L, -1))
luaL_error(L, "grammar has no initial rule");
else
luaL_error(L, "initial rule '%s' is not a pattern", lua_tostring(L, -2));
}
lua_pushvalue(L, -2);
lua_pushinteger(L, 1);
lua_settable(L, postab);
if (DEBUG) assert( (lua_gettop(L) - top) == 2 );
}
static int collectrules (lua_State *L, int arg, int *totalsize) {
LOGf("*** collectrules(grammar table at stack position %d, -)\n", arg);
int n = 1;
int postab = lua_gettop(L) + 1;
int size;
lua_newtable(L);
getfirstrule(L, arg, postab);
size = 2 + getsize(L, postab + 2);
lua_pushnil(L);
while (lua_next(L, arg) != 0) {
if (lua_tonumber(L, -2) == 1 ||
lp_equal(L, -2, postab + 1)) {
lua_pop(L, 1);
continue;
}
if (!testpattern(L, -1))
luaL_error(L, "rule '%s' is not a pattern", val2str(L, -2));
luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules");
lua_pushvalue(L, -2);
lua_pushinteger(L, size);
lua_settable(L, postab);
size += 1 + getsize(L, -1);
lua_pushvalue(L, -2);
n++;
}
*totalsize = size + 1;
return n;
}
static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) {
int i;
TTree *nd = sib1(grammar);
for (i = 0; i < n; i++) {
int ridx = frule + 2*i + 1;
int rulesize;
TTree *rn = gettree(L, ridx, &rulesize);
nd->tag = TRule;
nd->key = 0;
nd->cap = i;
nd->u.ps = rulesize + 1;
memcpy(sib1(nd), rn, rulesize * sizeof(TTree));
mergektable(L, ridx, sib1(nd));
nd = sib2(nd);
}
nd->tag = TTrue;
}
static int checkloops (TTree *tree) {
tailcall:
if (tree->tag == TRep && nullable(sib1(tree)))
return 1;
else if (tree->tag == TGrammar)
return 0;
else {
switch (numsiblings[tree->tag]) {
case 1:
tree = sib1(tree); goto tailcall;
case 2:
if (checkloops(sib1(tree))) return 1;
tree = sib2(tree); goto tailcall;
default: assert(numsiblings[tree->tag] == 0); return 0;
}
}
}
static int verifyerror (lua_State *L, int *passed, int npassed) {
if (!isktable(L, -1)) luaL_error(L, "%s:did not find ktable at top of stack", __func__);
int i, j;
for (i = npassed - 1; i >= 0; i--) {
for (j = i - 1; j >= 0; j--) {
if (passed[i] == passed[j]) {
ktable_get(L, -1, passed[i]);
return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1));
}
}
}
return luaL_error(L, "too many left calls in grammar");
}
static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
int nb) {
tailcall:
if (!isktable(L, -1)) luaL_error(L, "%s:did not find ktable at top of stack", __func__);
switch (tree->tag) {
case TChar: case TSet: case TAny:
case TFalse: case THalt:
return nb;
case TTrue:
case TBehind:
return 1;
case TNot: case TAnd: case TRep:
tree = sib1(tree); nb = 1; goto tailcall;
case TCapture: case TRunTime:
tree = sib1(tree); goto tailcall;
case TCall:
tree = sib2(tree); goto tailcall;
case TSeq:
if (!verifyrule(L, sib1(tree), passed, npassed, 0))
return nb;
tree = sib2(tree); goto tailcall;
case TChoice:
nb = verifyrule(L, sib1(tree), passed, npassed, nb);
tree = sib2(tree); goto tailcall;
case TRule:
if (npassed >= MAXRULES)
return verifyerror(L, passed, npassed);
else {
passed[npassed++] = tree->key;
tree = sib1(tree); goto tailcall;
}
case TGrammar:
return nullable(tree);
default: assert(0); return 0;
}
}
static void verifygrammar (lua_State *L, TTree *grammar) {
if (!isktable(L, -1)) luaL_error(L, "%s:did not find ktable at top of stack", __func__);
int passed[MAXRULES];
TTree *rule;
for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
if (rule->key == 0) continue;
if (DEBUG) {
ktable_get(L, -1, rule->key);
assert( lua_isstring(L, -1) );
lua_pop(L, 1);
}
verifyrule(L, sib1(rule), passed, 0, 0);
}
assert(rule->tag == TTrue);
for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
if (rule->key == 0) continue;
if (checkloops(sib1(rule))) {
ktable_get(L, -1, rule->key);
luaL_error(L, "empty loop in rule '%s'", val2str(L, -1));
}
}
assert(rule->tag == TTrue);
}
static void initialrulename (lua_State *L, TTree *grammar, int frule) {
if (sib1(grammar)->key == 0) {
Ktable *kt = lua_touserdata(L, -1);
int n = ktable_len(kt) + 1;
LOGf("initial rule not referenced, and n = %d\n", n);
lua_pushvalue(L, frule);
assert( lua_isstring(L, -1) );
size_t name_len;
const char *name = lua_tolstring(L, -1, &name_len);
int actualpos = ktable_add(kt, name, name_len);
if (!actualpos)
luaL_error(L, "(initial rule) out of memory");
lua_pop(L, 1);
assert( actualpos == n );
sib1(grammar)->key = n;
}
}
static TTree *newgrammar (lua_State *L, int arg) {
int treesize;
int frule = lua_gettop(L) + 2;
int n = collectrules(L, arg, &treesize);
TTree *g = newtree(L, treesize);
luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules");
g->tag = TGrammar; g->u.n = n;
pushnewktable(L, n);
Ktable *kt = (Ktable *)lua_touserdata(L, -1);
setktable(L, -2);
buildgrammar(L, g, frule, n);
finalfix(L, frule - 1, g, sib1(g), kt);
getktable(L, -1);
initialrulename(L, g, frule);
verifygrammar(L, g);
lua_pop(L, 1);
lua_insert(L, -(n * 2 + 2));
lua_pop(L, n * 2 + 1);
return g;
}
#if defined(DEBUG)
#define CHECK_KEY(k) do { \
if (((k) < 0) || ((k) > keymax)) { \
printf("tree->key %d < 1 or exceeds max (%d)\n", (k), keymax); \
fflush(stdout); \
} \
} while(0);
#else
#define CHECK_KEY(k)
#endif
static void update_capture_keys(TTree *tree, int *mapping, int keymax) {
int i;
switch (tree->tag) {
case TCapture: {
CHECK_KEY(tree->key);
tree->key = mapping[tree->key];
if (tree->cap == Crosieconst) {
CHECK_KEY(tree->u.n);
tree->u.n = mapping[tree->u.n];
assert(sib1(tree)->tag == TTrue);
} else {
update_capture_keys(sib1(tree), mapping, keymax);
}
break;
}
case TBackref: {
CHECK_KEY(tree->key);
tree->key = mapping[tree->key];
break;
}
case TRule: {
CHECK_KEY(tree->key);
tree->key = mapping[tree->key];
update_capture_keys(sib1(tree), mapping, keymax);
break;
}
case TGrammar: {
TTree *rule = sib1(tree);
for (i = 0; i < tree->u.n; i++) {
update_capture_keys(rule, mapping, keymax);
rule = sib2(rule);
}
assert(rule->tag == TTrue);
break;
}
case TNoTree: {
break;
}
default: {
int sibs = numsiblings[tree->tag];
if (sibs >= 1) {
update_capture_keys(sib1(tree), mapping, keymax);
if (sibs >= 2)
update_capture_keys(sib2(tree), mapping, keymax);
}
break;
}
}
}
static void compact_ktable(lua_State *L, Pattern *p) {
int idx, newidx;
Ktable *ckt = ktable_compact(p->kt);
if (!ckt) luaL_error(L, "%s:%d: could not compact ktable\n", __FILE__, __LINE__);
#if 0#endif
int *mapping = (int *)calloc(ktable_len(p->kt)+1, sizeof(int));
if (!mapping) luaL_error(L, "%s:%d: out of memory (while compacting ktable)\n", __FILE__, __LINE__);
mapping[0] = 0;
for (idx = 1; idx <= ktable_len(p->kt); idx++) {
size_t len;
const char *name = ktable_element_name(p->kt, idx, &len);
newidx = ktable_compact_search(ckt, name, len);
if (newidx == 0) {
free(mapping);
luaL_error(L, "%s:%d: incomplete compacted ktable, missing %.*s\n",
__FILE__, __LINE__, (int) len, name);
}
mapping[idx] = newidx;
}
update_capture_keys(p->tree, mapping, idx);
ktable_free(p->kt);
p->kt = ckt;
free(mapping);
}
static Instruction *prepcompile (lua_State *L, Pattern *p) {
finalfix(L, 0, NULL, p->tree, p->kt);
compact_ktable(L, p);
Instruction *code = compile(L, p);
if (!code)
luaL_error(L, "cannot compile expression containing a precompiled pattern");
return code;
}
static int lp_printtree (lua_State *L) {
TTree *tree = getpatt(L, 1, NULL);
Pattern *p = (Pattern *)luaL_checkudata(L, 1, PATTERN_T);
prepcompile(L, p);
int c = lua_toboolean(L, 2);
if (c) printktable(p->kt);
printtree(tree, 0);
return 0;
}
static int lp_printcode (lua_State *L) {
Pattern *p = getpattern(L, 1);
printktable(p->kt);
if (p->code == NULL)
prepcompile(L, p);
printpatt(p->code, p->codesize);
return 0;
}
static int lp_make_compiled_pattern (lua_State *L, int nsize, Instruction *code, Ktable *kt) {
Pattern *p = (Pattern *)lua_newuserdata(L, sizeof(TTree) + sizeof(Pattern));
p->code = code;
p->codesize = nsize;
p->tree->tag = TNoTree;
p->kt = kt;
luaL_getmetatable(L, PATTERN_T);
lua_setmetatable(L, -2);
return 1;
}
static int lp_loadRPLX (lua_State *L) {
const char *filename;
filename = lua_tostring(L, 1);
Chunk chunk;
int err = file_load(filename, &chunk);
if (err) {
if ((err > 0) && (err < FILE_ERR_SENTINEL))
luaL_error(L, FILE_MESSAGES[err]);
else
luaL_error(L, "unknown error in file_load (please report this as a bug)");
}
return lp_make_compiled_pattern(L, chunk.codesize, chunk.code, chunk.ktable);
}
static int lp_saveRPLX (lua_State *L) {
const char *filename;
filename = lua_tostring(L, 2);
Pattern *p = getpattern(L, 1);
finalfix(L, 0, NULL, p->tree, p->kt);
if (p->code == NULL)
prepcompile(L, p);
Chunk chunk;
chunk.code = p->code;
chunk.codesize = p->codesize;
chunk.ktable = p->kt;
int err = file_save(filename, &chunk);
if (err) {
if ((err > 0) && (err < FILE_ERR_SENTINEL))
luaL_error(L, FILE_MESSAGES[err]);
else
luaL_error(L, "unknown error in file_save (please report this as a bug)");
}
return 0;
}
static Encoder debug_encoder = { debug_Open, debug_Close };
static Encoder byte_encoder = { byte_Open, byte_Close };
static Encoder json_encoder = { json_Open, json_Close };
static Encoder status_encoder = { NULL, NULL };
static int set_encoder (Encoder *encoder, int etype) {
switch (etype) {
case ENCODE_BYTE: {
*encoder = byte_encoder; break;
}
case ENCODE_JSON: {
*encoder = json_encoder; break;
}
case ENCODE_LINE: {
*encoder = status_encoder; break;
}
case ENCODE_STATUS: {
*encoder = status_encoder; break;
}
case ENCODE_DEBUG: {
*encoder = debug_encoder; break;
}
default: {
return 0;
} }
return 1;
}
static uint32_t initposition (int pos, size_t len) {
if (pos == 0) return 1;
if (pos > 0) {
if ((size_t)pos <= len)
return (size_t)pos;
else
return len+1;
}
else {
if ((size_t)(-pos) <= len)
return len - ((size_t)(-pos)) + 1;
else
return 1;
}
}
int r_match_lua (lua_State *L);
int r_match_lua (lua_State *L) {
Pattern *p;
byte_ptr input_ptr;
size_t input_len;
str input;
Chunk chunk;
RBuffer *output;
int start, etype, timeflag;
struct rosie_matchresult match_result;
p = (getpatt(L, 1, NULL), getpattern(L, 1));
chunk.code = (p->code != NULL) ? p->code : prepcompile(L, p);
chunk.codesize = p->codesize;
chunk.ktable = p->kt;
chunk.filename = NULL;
int input_type = lua_type(L, SUBJIDX);
switch (input_type) {
case LUA_TSTRING: {
input_ptr = luaL_checklstring(L, SUBJIDX, &input_len);
break;
}
case LUA_TUSERDATA: {
RBuffer *rbuf = luaL_testudata(L, SUBJIDX, ROSIE_BUFFER);
if (rbuf) {
input_ptr = (*rbuf)->data;
input_len = (*rbuf)->n;
break;
}
}
default:
return luaL_argerror(L, SUBJIDX, "not rosie buffer or lua string");
}
output = luaL_testudata(L, SUBJIDX+1, ROSIE_BUFFER);
assert(output);
lua_Integer duration0, duration1;
start = luaL_optinteger(L, SUBJIDX+2, 1);
etype = luaL_optinteger(L, SUBJIDX+3, ENCODE_BYTE);
if (lua_isnoneornil(L, SUBJIDX+4)) {
duration0 = 0;
duration1 = 0;
timeflag = 0;
} else {
duration0 = luaL_optinteger(L, SUBJIDX+4, 0);
duration1 = luaL_optinteger(L, SUBJIDX+5, 0);
timeflag = 1;
}
Stats stats = {duration0, duration1, 0, 0, 0, 0};
Encoder encoder;
if (!set_encoder(&encoder, etype))
luaL_error(L, "bad encoder value");
buf_reset(*output);
uint32_t startpos = initposition(start, input_len);
assert( startpos >= 1 );
assert( startpos <= input_len +1 );
input = (str) {.ptr = (byte_ptr) input_ptr,
.len = (uint32_t) input_len};
int err = vm_match2(&chunk,
&input, startpos, 0,
encoder, timeflag,
*output,
&match_result);
if (err) {
const char *msg = STRERROR(err, MATCH_MESSAGES);
luaL_error(L, msg);
}
if (etype == ENCODE_LINE) {
if (!buf_prepsize(*output, input_len)) return MATCH_OUT_OF_MEM;
buf_addlstring_UNSAFE(*output, input_ptr, input_len);
}
lua_pushvalue(L, SUBJIDX+1);
if (!match_result.data.ptr && !match_result.data.len)
lua_pushinteger(L, 0);
lua_pushinteger(L, match_result.leftover);
lua_pushboolean(L, match_result.abend);
lua_pushinteger(L, stats.total_time);
lua_pushinteger(L, stats.match_time);
return 5;
}
#define DISPLAY(msg) \
do { fprintf(stderr, "%s:%d:%s(): %s", __FILE__, \
__LINE__, __func__, msg); \
fflush(stderr); \
} while (0)
#define DISPLAYf(fmt, ...) \
do { fprintf(stderr, "%s:%d:%s(): " fmt, __FILE__, \
__LINE__, __func__, __VA_ARGS__); \
fflush(stderr); \
} while (0)
int r_match_C2 (void *pattern_as_void_ptr,
struct rosie_string *input, uint32_t startpos, uint32_t endpos,
uint8_t etype, uint8_t collect_times,
Buffer *output, struct rosie_matchresult *match_result) {
int err;
Chunk chunk;
Encoder encoder;
if (!pattern_as_void_ptr) return MATCH_ERR_NULL_PATTERN;
if (!input) return MATCH_ERR_NULL_INPUT;
if (!output) return MATCH_ERR_NULL_OUTPUT;
if (!match_result) return MATCH_ERR_NULL_MATCHRESULT;
Pattern *p = (Pattern *) pattern_as_void_ptr;
if (p->code == NULL) {
LOG("internal error: code generator has not run?\n");
return MATCH_IMPL_ERROR;
}
chunk.code = p->code;
chunk.codesize = p->codesize;
chunk.ktable = p->kt;
chunk.filename = NULL;
if (!set_encoder(&encoder, etype))
return MATCH_INVALID_ENCODER;
buf_reset(output);
err = vm_match2(&chunk,
input, startpos, endpos,
encoder,
collect_times,
output,
match_result);
if (err != 0) return err;
if (etype == ENCODE_LINE) {
assert(match_result->data.ptr == NULL);
if (match_result->data.len == MATCH_WITHOUT_DATA) {
assert( output->n == 0 );
if (!buf_prepsize(output, (size_t) input->len)) return MATCH_OUT_OF_MEM;
buf_addlstring_UNSAFE(output, input->ptr, (size_t) input->len);
match_result->data.ptr = output->data;
match_result->data.len = output->n;
}
}
return MATCH_OK;
}
static int lp_version (lua_State *L) {
lua_pushstring(L, VERSION);
return 1;
}
static int lp_type (lua_State *L) {
if (testpattern(L, 1))
lua_pushliteral(L, "pattern");
else
lua_pushnil(L);
return 1;
}
int lp_gc (lua_State *L) {
Pattern *p = getpattern(L, 1);
if (p->kt) {
ktable_free(p->kt);
}
realloccode(L, p, 0);
return 0;
}
static struct luaL_Reg pattreg[] = {
{"ptree", lp_printtree},
{"pcode", lp_printcode},
{"B", lp_behind},
{"V", lp_V},
{"P", lp_P},
{"S", lp_set},
{"R", lp_range},
{"version", lp_version},
{"type", lp_type},
{"Halt", lp_halt},
{"saveRPLX", lp_saveRPLX},
{"loadRPLX", lp_loadRPLX},
{"usize", r_userdata_size},
{"psize", r_pattern_size},
{"codegen", r_codegen_if_needed},
{"rcap", r_capture},
{"rconstcap", r_constcapture},
{"Br", r_backref},
{"rmatch", r_match_lua},
{"newbuffer", r_lua_newbuffer},
{"getdata", r_lua_getdata},
{"writedata", r_lua_writedata},
{"add", r_lua_add},
{"decode", r_lua_decode},
{NULL, NULL}
};
static struct luaL_Reg metareg[] = {
{"__mul", lp_seq},
{"__add", lp_choice},
{"__pow", lp_star},
{"__gc", lp_gc},
{"__len", lp_and},
{"__unm", lp_not},
{"__sub", lp_sub},
{NULL, NULL}
};
int luaopen_lpeg (lua_State *L);
int luaopen_lpeg (lua_State *L) {
luaL_newmetatable(L, PATTERN_T);
luaL_setfuncs(L, metareg, 0);
luaL_newlib(L, pattreg);
lua_pushvalue(L, -1);
lua_setfield(L, -3, "__index");
return 1;
}