#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#define NLBLOCK md
#define PSSTART start_subject
#define PSEND end_subject
#include "pcre_internal.h"
#define SP " "
#define OP_PROP_EXTRA 300
#define OP_EXTUNI_EXTRA 320
#define OP_ANYNL_EXTRA 340
#define OP_HSPACE_EXTRA 360
#define OP_VSPACE_EXTRA 380
static const pcre_uint8 coptable[] = {
0,
0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0,
0, 0,
0, 0, 0, 0, 0,
0,
0, 0, 0, 0, 0, 0,
1,
1,
1,
1,
1, 1, 1, 1, 1, 1,
1+IMM2_SIZE, 1+IMM2_SIZE,
1+IMM2_SIZE,
1, 1, 1, 1+IMM2_SIZE,
1, 1, 1, 1, 1, 1,
1+IMM2_SIZE, 1+IMM2_SIZE,
1+IMM2_SIZE,
1, 1, 1, 1+IMM2_SIZE,
1, 1, 1, 1, 1, 1,
1+IMM2_SIZE, 1+IMM2_SIZE,
1+IMM2_SIZE,
1, 1, 1, 1+IMM2_SIZE,
1, 1, 1, 1, 1, 1,
1+IMM2_SIZE, 1+IMM2_SIZE,
1+IMM2_SIZE,
1, 1, 1, 1+IMM2_SIZE,
1, 1, 1, 1, 1, 1,
1+IMM2_SIZE, 1+IMM2_SIZE,
1+IMM2_SIZE,
1, 1, 1, 1+IMM2_SIZE,
0, 0, 0, 0, 0, 0,
0, 0,
0, 0, 0, 0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0, 0,
0, 0, 0, 0, 0,
0, 0, 0, 0, 0,
0, 0,
0, 0,
0,
0, 0, 0,
0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0
};
static const pcre_uint8 poptable[] = {
0,
0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1,
1, 1, 1,
1, 1,
1, 1, 1, 1, 1,
1,
0, 0, 0, 0, 0, 0,
1,
1,
1,
1,
1, 1, 1, 1, 1, 1,
1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1, 1, 1,
1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1, 1, 1,
1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1, 1, 1,
1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1, 1, 1,
1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1, 1, 1,
1, 1,
1, 1, 1, 1,
1,
1,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0, 0,
0, 0, 0, 0, 0,
0, 0, 0, 0, 0,
0, 0,
0, 0,
0,
0, 0, 0,
0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0
};
static const pcre_uint8 toptable1[] = {
0, 0, 0, 0, 0, 0,
ctype_digit, ctype_digit,
ctype_space, ctype_space,
ctype_word, ctype_word,
0, 0
};
static const pcre_uint8 toptable2[] = {
0, 0, 0, 0, 0, 0,
ctype_digit, 0,
ctype_space, 0,
ctype_word, 0,
1, 1
};
typedef struct stateblock {
int offset;
int count;
int data;
} stateblock;
#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
#ifdef PCRE_DEBUG
static void
pchars(const pcre_uchar *p, int length, FILE *f)
{
pcre_uint32 c;
while (length-- > 0)
{
if (isprint(c = *(p++)))
fprintf(f, "%c", c);
else
fprintf(f, "\\x{%02x}", c);
}
}
#endif
#define ADD_ACTIVE(x,y) \
if (active_count++ < wscount) \
{ \
next_active_state->offset = (x); \
next_active_state->count = (y); \
next_active_state++; \
DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
} \
else return PCRE_ERROR_DFA_WSSIZE
#define ADD_ACTIVE_DATA(x,y,z) \
if (active_count++ < wscount) \
{ \
next_active_state->offset = (x); \
next_active_state->count = (y); \
next_active_state->data = (z); \
next_active_state++; \
DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
} \
else return PCRE_ERROR_DFA_WSSIZE
#define ADD_NEW(x,y) \
if (new_count++ < wscount) \
{ \
next_new_state->offset = (x); \
next_new_state->count = (y); \
next_new_state++; \
DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
} \
else return PCRE_ERROR_DFA_WSSIZE
#define ADD_NEW_DATA(x,y,z) \
if (new_count++ < wscount) \
{ \
next_new_state->offset = (x); \
next_new_state->count = (y); \
next_new_state->data = (z); \
next_new_state++; \
DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
(x), (y), (z), __LINE__)); \
} \
else return PCRE_ERROR_DFA_WSSIZE
static int
internal_dfa_exec(
dfa_match_data *md,
const pcre_uchar *this_start_code,
const pcre_uchar *current_subject,
int start_offset,
int *offsets,
int offsetcount,
int *workspace,
int wscount,
int rlevel)
{
stateblock *active_states, *new_states, *temp_states;
stateblock *next_active_state, *next_new_state;
const pcre_uint8 *ctypes, *lcc, *fcc;
const pcre_uchar *ptr;
const pcre_uchar *end_code, *first_op;
dfa_recursion_info new_recursive;
int active_count, new_count, match_count;
const pcre_uchar *start_subject = md->start_subject;
const pcre_uchar *end_subject = md->end_subject;
const pcre_uchar *start_code = md->start_code;
#ifdef SUPPORT_UTF
BOOL utf = (md->poptions & PCRE_UTF8) != 0;
#else
BOOL utf = FALSE;
#endif
BOOL reset_could_continue = FALSE;
rlevel++;
offsetcount &= (-2);
wscount -= 2;
wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
(2 * INTS_PER_STATEBLOCK);
DPRINTF(("\n%.*s---------------------\n"
"%.*sCall to internal_dfa_exec f=%d\n",
rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
ctypes = md->tables + ctypes_offset;
lcc = md->tables + lcc_offset;
fcc = md->tables + fcc_offset;
match_count = PCRE_ERROR_NOMATCH;
active_states = (stateblock *)(workspace + 2);
next_new_state = new_states = active_states + wscount;
new_count = 0;
first_op = this_start_code + 1 + LINK_SIZE +
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
? IMM2_SIZE:0);
if (*first_op == OP_REVERSE)
{
int max_back = 0;
int gone_back;
end_code = this_start_code;
do
{
int back = GET(end_code, 2+LINK_SIZE);
if (back > max_back) max_back = back;
end_code += GET(end_code, 1);
}
while (*end_code == OP_ALT);
#ifdef SUPPORT_UTF
if (utf)
{
for (gone_back = 0; gone_back < max_back; gone_back++)
{
if (current_subject <= start_subject) break;
current_subject--;
ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
}
}
else
#endif
{
gone_back = (current_subject - max_back < start_subject)?
(int)(current_subject - start_subject) : max_back;
current_subject -= gone_back;
}
if (current_subject < md->start_used_ptr)
md->start_used_ptr = current_subject;
end_code = this_start_code;
do
{
int back = GET(end_code, 2+LINK_SIZE);
if (back <= gone_back)
{
int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
ADD_NEW_DATA(-bstate, 0, gone_back - back);
}
end_code += GET(end_code, 1);
}
while (*end_code == OP_ALT);
}
else
{
end_code = this_start_code;
if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
{
do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
new_count = workspace[1];
if (!workspace[0])
memcpy(new_states, active_states, new_count * sizeof(stateblock));
}
else
{
int length = 1 + LINK_SIZE +
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
? IMM2_SIZE:0);
do
{
ADD_NEW((int)(end_code - start_code + length), 0);
end_code += GET(end_code, 1);
length = 1 + LINK_SIZE;
}
while (*end_code == OP_ALT);
}
}
workspace[0] = 0;
DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
ptr = current_subject;
for (;;)
{
int i, j;
int clen, dlen;
pcre_uint32 c, d;
int forced_fail = 0;
BOOL partial_newline = FALSE;
BOOL could_continue = reset_could_continue;
reset_could_continue = FALSE;
temp_states = active_states;
active_states = new_states;
new_states = temp_states;
active_count = new_count;
new_count = 0;
workspace[0] ^= 1;
workspace[1] = active_count;
#ifdef PCRE_DEBUG
printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
pchars(ptr, STRLEN_UC(ptr), stdout);
printf("\"\n");
printf("%.*sActive states: ", rlevel*2-2, SP);
for (i = 0; i < active_count; i++)
printf("%d/%d ", active_states[i].offset, active_states[i].count);
printf("\n");
#endif
next_active_state = active_states + active_count;
next_new_state = new_states;
if (ptr < end_subject)
{
clen = 1;
#ifdef SUPPORT_UTF
GETCHARLENTEST(c, ptr, clen);
#else
c = *ptr;
#endif
}
else
{
clen = 0;
c = NOTACHAR;
}
for (i = 0; i < active_count; i++)
{
stateblock *current_state = active_states + i;
BOOL caseless = FALSE;
const pcre_uchar *code;
int state_offset = current_state->offset;
int codevalue, rrc;
int count;
#ifdef PCRE_DEBUG
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
if (clen == 0) printf("EOL\n");
else if (c > 32 && c < 127) printf("'%c'\n", c);
else printf("0x%02x\n", c);
#endif
if (state_offset < 0)
{
if (current_state->data > 0)
{
DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
ADD_NEW_DATA(state_offset, current_state->count,
current_state->data - 1);
if (could_continue) reset_could_continue = TRUE;
continue;
}
else
{
current_state->offset = state_offset = -state_offset;
}
}
for (j = 0; j < i; j++)
{
if (active_states[j].offset == state_offset &&
active_states[j].count == current_state->count)
{
DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
goto NEXT_ACTIVE_STATE;
}
}
code = start_code + state_offset;
codevalue = *code;
if (clen == 0 && poptable[codevalue] != 0)
could_continue = TRUE;
if (coptable[codevalue] > 0)
{
dlen = 1;
#ifdef SUPPORT_UTF
if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
#endif
d = code[coptable[codevalue]];
if (codevalue >= OP_TYPESTAR)
{
switch(d)
{
case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
case OP_NOTPROP:
case OP_PROP: codevalue += OP_PROP_EXTRA; break;
case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
case OP_NOT_HSPACE:
case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
case OP_NOT_VSPACE:
case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
default: break;
}
}
}
else
{
dlen = 0;
d = NOTACHAR;
}
switch (codevalue)
{
case OP_TABLE_LENGTH:
case OP_TABLE_LENGTH +
((sizeof(coptable) == OP_TABLE_LENGTH) &&
(sizeof(poptable) == OP_TABLE_LENGTH)):
break;
case OP_KET:
case OP_KETRMIN:
case OP_KETRMAX:
case OP_KETRPOS:
if (code != end_code)
{
ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
if (codevalue != OP_KET)
{
ADD_ACTIVE(state_offset - GET(code, 1), 0);
}
}
else
{
if (ptr > current_subject ||
((md->moptions & PCRE_NOTEMPTY) == 0 &&
((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
current_subject > start_subject + md->start_offset)))
{
if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
else if (match_count > 0 && ++match_count * 2 > offsetcount)
match_count = 0;
count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
if (offsetcount >= 2)
{
offsets[0] = (int)(current_subject - start_subject);
offsets[1] = (int)(ptr - start_subject);
DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
offsets[1] - offsets[0], (char *)current_subject));
}
if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
{
DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
"%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
match_count, rlevel*2-2, SP));
return match_count;
}
}
}
break;
case OP_ALT:
do { code += GET(code, 1); } while (*code == OP_ALT);
ADD_ACTIVE((int)(code - start_code), 0);
break;
case OP_BRA:
case OP_SBRA:
do
{
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
code += GET(code, 1);
}
while (*code == OP_ALT);
break;
case OP_CBRA:
case OP_SCBRA:
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0);
code += GET(code, 1);
while (*code == OP_ALT)
{
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
code += GET(code, 1);
}
break;
case OP_BRAZERO:
case OP_BRAMINZERO:
ADD_ACTIVE(state_offset + 1, 0);
code += 1 + GET(code, 2);
while (*code == OP_ALT) code += GET(code, 1);
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
break;
case OP_SKIPZERO:
code += 1 + GET(code, 2);
while (*code == OP_ALT) code += GET(code, 1);
ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
break;
case OP_CIRC:
if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
{ ADD_ACTIVE(state_offset + 1, 0); }
break;
case OP_CIRCM:
if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
(ptr != end_subject && WAS_NEWLINE(ptr)))
{ ADD_ACTIVE(state_offset + 1, 0); }
break;
case OP_EOD:
if (ptr >= end_subject)
{
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
could_continue = TRUE;
else { ADD_ACTIVE(state_offset + 1, 0); }
}
break;
case OP_SOD:
if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
break;
case OP_SOM:
if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
break;
case OP_ANY:
if (clen > 0 && !IS_NEWLINE(ptr))
{
if (ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else
{
ADD_NEW(state_offset + 1, 0);
}
}
break;
case OP_ALLANY:
if (clen > 0)
{ ADD_NEW(state_offset + 1, 0); }
break;
case OP_EODN:
if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
could_continue = TRUE;
else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
{ ADD_ACTIVE(state_offset + 1, 0); }
break;
case OP_DOLL:
if ((md->moptions & PCRE_NOTEOL) == 0)
{
if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
could_continue = TRUE;
else if (clen == 0 ||
((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
(ptr == end_subject - md->nllen)
))
{ ADD_ACTIVE(state_offset + 1, 0); }
else if (ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
{
reset_could_continue = TRUE;
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
}
else could_continue = partial_newline = TRUE;
}
}
break;
case OP_DOLLM:
if ((md->moptions & PCRE_NOTEOL) == 0)
{
if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
could_continue = TRUE;
else if (clen == 0 ||
((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
{ ADD_ACTIVE(state_offset + 1, 0); }
else if (ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
{
reset_could_continue = TRUE;
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
}
else could_continue = partial_newline = TRUE;
}
}
else if (IS_NEWLINE(ptr))
{ ADD_ACTIVE(state_offset + 1, 0); }
break;
case OP_DIGIT:
case OP_WHITESPACE:
case OP_WORDCHAR:
if (clen > 0 && c < 256 &&
((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
{ ADD_NEW(state_offset + 1, 0); }
break;
case OP_NOT_DIGIT:
case OP_NOT_WHITESPACE:
case OP_NOT_WORDCHAR:
if (clen > 0 && (c >= 256 ||
((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
{ ADD_NEW(state_offset + 1, 0); }
break;
case OP_WORD_BOUNDARY:
case OP_NOT_WORD_BOUNDARY:
{
int left_word, right_word;
if (ptr > start_subject)
{
const pcre_uchar *temp = ptr - 1;
if (temp < md->start_used_ptr) md->start_used_ptr = temp;
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf) { BACKCHAR(temp); }
#endif
GETCHARTEST(d, temp);
#ifdef SUPPORT_UCP
if ((md->poptions & PCRE_UCP) != 0)
{
if (d == '_') left_word = TRUE; else
{
int cat = UCD_CATEGORY(d);
left_word = (cat == ucp_L || cat == ucp_N);
}
}
else
#endif
left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
}
else left_word = FALSE;
if (clen > 0)
{
#ifdef SUPPORT_UCP
if ((md->poptions & PCRE_UCP) != 0)
{
if (c == '_') right_word = TRUE; else
{
int cat = UCD_CATEGORY(c);
right_word = (cat == ucp_L || cat == ucp_N);
}
}
else
#endif
right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
}
else right_word = FALSE;
if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
{ ADD_ACTIVE(state_offset + 1, 0); }
}
break;
#ifdef SUPPORT_UCP
case OP_PROP:
case OP_NOTPROP:
if (clen > 0)
{
BOOL OK;
const pcre_uint32 *cp;
const ucd_record * prop = GET_UCD(c);
switch(code[1])
{
case PT_ANY:
OK = TRUE;
break;
case PT_LAMP:
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
prop->chartype == ucp_Lt;
break;
case PT_GC:
OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
break;
case PT_PC:
OK = prop->chartype == code[2];
break;
case PT_SC:
OK = prop->script == code[2];
break;
case PT_ALNUM:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
case PT_SPACE:
case PT_PXSPACE:
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
OK = TRUE;
break;
default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
}
break;
case PT_WORD:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
c == CHAR_UNDERSCORE;
break;
case PT_CLIST:
cp = PRIV(ucd_caseless_sets) + code[2];
for (;;)
{
if (c < *cp) { OK = FALSE; break; }
if (c == *cp++) { OK = TRUE; break; }
}
break;
case PT_UCNC:
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
c >= 0xe000;
break;
default:
OK = codevalue != OP_PROP;
break;
}
if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
}
break;
#endif
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEPOSPLUS:
count = current_state->count;
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0)
{
if (d == OP_ANY && ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
{
if (count > 0 && codevalue == OP_TYPEPOSPLUS)
{
active_count--;
next_active_state--;
}
count++;
ADD_NEW(state_offset, count);
}
}
break;
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEPOSQUERY:
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)
{
if (d == OP_ANY && ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
{
if (codevalue == OP_TYPEPOSQUERY)
{
active_count--;
next_active_state--;
}
ADD_NEW(state_offset + 2, 0);
}
}
break;
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPOSSTAR:
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)
{
if (d == OP_ANY && ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
{
if (codevalue == OP_TYPEPOSSTAR)
{
active_count--;
next_active_state--;
}
ADD_NEW(state_offset, 0);
}
}
break;
case OP_TYPEEXACT:
count = current_state->count;
if (clen > 0)
{
if (d == OP_ANY && ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
{
if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
else
{ ADD_NEW(state_offset, count); }
}
}
break;
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEPOSUPTO:
ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
count = current_state->count;
if (clen > 0)
{
if (d == OP_ANY && ptr + 1 >= md->end_subject &&
(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
c == NLBLOCK->nl[0])
{
could_continue = partial_newline = TRUE;
}
else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
(c < 256 &&
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
{
if (codevalue == OP_TYPEPOSUPTO)
{
active_count--;
next_active_state--;
}
if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
else
{ ADD_NEW(state_offset, count); }
}
}
break;
#ifdef SUPPORT_UCP
case OP_PROP_EXTRA + OP_TYPEPLUS:
case OP_PROP_EXTRA + OP_TYPEMINPLUS:
case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
count = current_state->count;
if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
if (clen > 0)
{
BOOL OK;
const pcre_uint32 *cp;
const ucd_record * prop = GET_UCD(c);
switch(code[2])
{
case PT_ANY:
OK = TRUE;
break;
case PT_LAMP:
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
prop->chartype == ucp_Lt;
break;
case PT_GC:
OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
break;
case PT_PC:
OK = prop->chartype == code[3];
break;
case PT_SC:
OK = prop->script == code[3];
break;
case PT_ALNUM:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
case PT_SPACE:
case PT_PXSPACE:
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
OK = TRUE;
break;
default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
}
break;
case PT_WORD:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
c == CHAR_UNDERSCORE;
break;
case PT_CLIST:
cp = PRIV(ucd_caseless_sets) + code[3];
for (;;)
{
if (c < *cp) { OK = FALSE; break; }
if (c == *cp++) { OK = TRUE; break; }
}
break;
case PT_UCNC:
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
c >= 0xe000;
break;
default:
OK = codevalue != OP_PROP;
break;
}
if (OK == (d == OP_PROP))
{
if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
{
active_count--;
next_active_state--;
}
count++;
ADD_NEW(state_offset, count);
}
}
break;
case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
count = current_state->count;
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0)
{
int lgb, rgb;
const pcre_uchar *nptr = ptr + clen;
int ncount = 0;
if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
{
active_count--;
next_active_state--;
}
lgb = UCD_GRAPHBREAK(c);
while (nptr < end_subject)
{
dlen = 1;
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
rgb = UCD_GRAPHBREAK(d);
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
ncount++;
lgb = rgb;
nptr += dlen;
}
count++;
ADD_NEW_DATA(-state_offset, count, ncount);
}
break;
#endif
case OP_ANYNL_EXTRA + OP_TYPEPLUS:
case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
count = current_state->count;
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0)
{
int ncount = 0;
switch (c)
{
case CHAR_VT:
case CHAR_FF:
case CHAR_NEL:
#ifndef EBCDIC
case 0x2028:
case 0x2029:
#endif
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
goto ANYNL01;
case CHAR_CR:
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
ANYNL01:
case CHAR_LF:
if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
{
active_count--;
next_active_state--;
}
count++;
ADD_NEW_DATA(-state_offset, count, ncount);
break;
default:
break;
}
}
break;
case OP_VSPACE_EXTRA + OP_TYPEPLUS:
case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
count = current_state->count;
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0)
{
BOOL OK;
switch (c)
{
VSPACE_CASES:
OK = TRUE;
break;
default:
OK = FALSE;
break;
}
if (OK == (d == OP_VSPACE))
{
if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
{
active_count--;
next_active_state--;
}
count++;
ADD_NEW_DATA(-state_offset, count, 0);
}
}
break;
case OP_HSPACE_EXTRA + OP_TYPEPLUS:
case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
count = current_state->count;
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0)
{
BOOL OK;
switch (c)
{
HSPACE_CASES:
OK = TRUE;
break;
default:
OK = FALSE;
break;
}
if (OK == (d == OP_HSPACE))
{
if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
{
active_count--;
next_active_state--;
}
count++;
ADD_NEW_DATA(-state_offset, count, 0);
}
}
break;
#ifdef SUPPORT_UCP
case OP_PROP_EXTRA + OP_TYPEQUERY:
case OP_PROP_EXTRA + OP_TYPEMINQUERY:
case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
count = 4;
goto QS1;
case OP_PROP_EXTRA + OP_TYPESTAR:
case OP_PROP_EXTRA + OP_TYPEMINSTAR:
case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
count = 0;
QS1:
ADD_ACTIVE(state_offset + 4, 0);
if (clen > 0)
{
BOOL OK;
const pcre_uint32 *cp;
const ucd_record * prop = GET_UCD(c);
switch(code[2])
{
case PT_ANY:
OK = TRUE;
break;
case PT_LAMP:
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
prop->chartype == ucp_Lt;
break;
case PT_GC:
OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
break;
case PT_PC:
OK = prop->chartype == code[3];
break;
case PT_SC:
OK = prop->script == code[3];
break;
case PT_ALNUM:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
case PT_SPACE:
case PT_PXSPACE:
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
OK = TRUE;
break;
default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
}
break;
case PT_WORD:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
c == CHAR_UNDERSCORE;
break;
case PT_CLIST:
cp = PRIV(ucd_caseless_sets) + code[3];
for (;;)
{
if (c < *cp) { OK = FALSE; break; }
if (c == *cp++) { OK = TRUE; break; }
}
break;
case PT_UCNC:
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
c >= 0xe000;
break;
default:
OK = codevalue != OP_PROP;
break;
}
if (OK == (d == OP_PROP))
{
if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
{
active_count--;
next_active_state--;
}
ADD_NEW(state_offset + count, 0);
}
}
break;
case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
count = 2;
goto QS2;
case OP_EXTUNI_EXTRA + OP_TYPESTAR:
case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
count = 0;
QS2:
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)
{
int lgb, rgb;
const pcre_uchar *nptr = ptr + clen;
int ncount = 0;
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
{
active_count--;
next_active_state--;
}
lgb = UCD_GRAPHBREAK(c);
while (nptr < end_subject)
{
dlen = 1;
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
rgb = UCD_GRAPHBREAK(d);
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
ncount++;
lgb = rgb;
nptr += dlen;
}
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
}
break;
#endif
case OP_ANYNL_EXTRA + OP_TYPEQUERY:
case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
count = 2;
goto QS3;
case OP_ANYNL_EXTRA + OP_TYPESTAR:
case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
count = 0;
QS3:
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)
{
int ncount = 0;
switch (c)
{
case CHAR_VT:
case CHAR_FF:
case CHAR_NEL:
#ifndef EBCDIC
case 0x2028:
case 0x2029:
#endif
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
goto ANYNL02;
case CHAR_CR:
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
ANYNL02:
case CHAR_LF:
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
{
active_count--;
next_active_state--;
}
ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
break;
default:
break;
}
}
break;
case OP_VSPACE_EXTRA + OP_TYPEQUERY:
case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
count = 2;
goto QS4;
case OP_VSPACE_EXTRA + OP_TYPESTAR:
case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
count = 0;
QS4:
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)
{
BOOL OK;
switch (c)
{
VSPACE_CASES:
OK = TRUE;
break;
default:
OK = FALSE;
break;
}
if (OK == (d == OP_VSPACE))
{
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
{
active_count--;
next_active_state--;
}
ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
}
}
break;
case OP_HSPACE_EXTRA + OP_TYPEQUERY:
case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
count = 2;
goto QS5;
case OP_HSPACE_EXTRA + OP_TYPESTAR:
case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
count = 0;
QS5:
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)
{
BOOL OK;
switch (c)
{
HSPACE_CASES:
OK = TRUE;
break;
default:
OK = FALSE;
break;
}
if (OK == (d == OP_HSPACE))
{
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
{
active_count--;
next_active_state--;
}
ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
}
}
break;
#ifdef SUPPORT_UCP
case OP_PROP_EXTRA + OP_TYPEEXACT:
case OP_PROP_EXTRA + OP_TYPEUPTO:
case OP_PROP_EXTRA + OP_TYPEMINUPTO:
case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
count = current_state->count;
if (clen > 0)
{
BOOL OK;
const pcre_uint32 *cp;
const ucd_record * prop = GET_UCD(c);
switch(code[1 + IMM2_SIZE + 1])
{
case PT_ANY:
OK = TRUE;
break;
case PT_LAMP:
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
prop->chartype == ucp_Lt;
break;
case PT_GC:
OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
break;
case PT_PC:
OK = prop->chartype == code[1 + IMM2_SIZE + 2];
break;
case PT_SC:
OK = prop->script == code[1 + IMM2_SIZE + 2];
break;
case PT_ALNUM:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
case PT_SPACE:
case PT_PXSPACE:
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
OK = TRUE;
break;
default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
}
break;
case PT_WORD:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
c == CHAR_UNDERSCORE;
break;
case PT_CLIST:
cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
for (;;)
{
if (c < *cp) { OK = FALSE; break; }
if (c == *cp++) { OK = TRUE; break; }
}
break;
case PT_UCNC:
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
c >= 0xe000;
break;
default:
OK = codevalue != OP_PROP;
break;
}
if (OK == (d == OP_PROP))
{
if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
{
active_count--;
next_active_state--;
}
if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
else
{ ADD_NEW(state_offset, count); }
}
}
break;
case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
count = current_state->count;
if (clen > 0)
{
int lgb, rgb;
const pcre_uchar *nptr = ptr + clen;
int ncount = 0;
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
{
active_count--;
next_active_state--;
}
lgb = UCD_GRAPHBREAK(c);
while (nptr < end_subject)
{
dlen = 1;
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
rgb = UCD_GRAPHBREAK(d);
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
ncount++;
lgb = rgb;
nptr += dlen;
}
if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
else
{ ADD_NEW_DATA(-state_offset, count, ncount); }
}
break;
#endif
case OP_ANYNL_EXTRA + OP_TYPEEXACT:
case OP_ANYNL_EXTRA + OP_TYPEUPTO:
case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
count = current_state->count;
if (clen > 0)
{
int ncount = 0;
switch (c)
{
case CHAR_VT:
case CHAR_FF:
case CHAR_NEL:
#ifndef EBCDIC
case 0x2028:
case 0x2029:
#endif
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
goto ANYNL03;
case CHAR_CR:
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
ANYNL03:
case CHAR_LF:
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
{
active_count--;
next_active_state--;
}
if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
else
{ ADD_NEW_DATA(-state_offset, count, ncount); }
break;
default:
break;
}
}
break;
case OP_VSPACE_EXTRA + OP_TYPEEXACT:
case OP_VSPACE_EXTRA + OP_TYPEUPTO:
case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
count = current_state->count;
if (clen > 0)
{
BOOL OK;
switch (c)
{
VSPACE_CASES:
OK = TRUE;
break;
default:
OK = FALSE;
}
if (OK == (d == OP_VSPACE))
{
if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
{
active_count--;
next_active_state--;
}
if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
else
{ ADD_NEW_DATA(-state_offset, count, 0); }
}
}
break;
case OP_HSPACE_EXTRA + OP_TYPEEXACT:
case OP_HSPACE_EXTRA + OP_TYPEUPTO:
case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
count = current_state->count;
if (clen > 0)
{
BOOL OK;
switch (c)
{
HSPACE_CASES:
OK = TRUE;
break;
default:
OK = FALSE;
break;
}
if (OK == (d == OP_HSPACE))
{
if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
{
active_count--;
next_active_state--;
}
if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
else
{ ADD_NEW_DATA(-state_offset, count, 0); }
}
}
break;
case OP_CHAR:
if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
break;
case OP_CHARI:
if (clen == 0) break;
#ifdef SUPPORT_UTF
if (utf)
{
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
{
unsigned int othercase;
if (c < 128)
othercase = fcc[c];
else
#ifdef SUPPORT_UCP
othercase = UCD_OTHERCASE(c);
#else
othercase = NOTACHAR;
#endif
if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
}
}
else
#endif
{
if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
{ ADD_NEW(state_offset + 2, 0); }
}
break;
#ifdef SUPPORT_UCP
case OP_EXTUNI:
if (clen > 0)
{
int lgb, rgb;
const pcre_uchar *nptr = ptr + clen;
int ncount = 0;
lgb = UCD_GRAPHBREAK(c);
while (nptr < end_subject)
{
dlen = 1;
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
rgb = UCD_GRAPHBREAK(d);
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
ncount++;
lgb = rgb;
nptr += dlen;
}
if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
}
break;
#endif
case OP_ANYNL:
if (clen > 0) switch(c)
{
case CHAR_VT:
case CHAR_FF:
case CHAR_NEL:
#ifndef EBCDIC
case 0x2028:
case 0x2029:
#endif
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
case CHAR_LF:
ADD_NEW(state_offset + 1, 0);
break;
case CHAR_CR:
if (ptr + 1 >= end_subject)
{
ADD_NEW(state_offset + 1, 0);
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
}
else if (UCHAR21TEST(ptr + 1) == CHAR_LF)
{
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
}
else
{
ADD_NEW(state_offset + 1, 0);
}
break;
}
break;
case OP_NOT_VSPACE:
if (clen > 0) switch(c)
{
VSPACE_CASES:
break;
default:
ADD_NEW(state_offset + 1, 0);
break;
}
break;
case OP_VSPACE:
if (clen > 0) switch(c)
{
VSPACE_CASES:
ADD_NEW(state_offset + 1, 0);
break;
default:
break;
}
break;
case OP_NOT_HSPACE:
if (clen > 0) switch(c)
{
HSPACE_CASES:
break;
default:
ADD_NEW(state_offset + 1, 0);
break;
}
break;
case OP_HSPACE:
if (clen > 0) switch(c)
{
HSPACE_CASES:
ADD_NEW(state_offset + 1, 0);
break;
default:
break;
}
break;
case OP_NOT:
if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
break;
case OP_NOTI:
if (clen > 0)
{
pcre_uint32 otherd;
#ifdef SUPPORT_UTF
if (utf && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = UCD_OTHERCASE(d);
#else
otherd = d;
#endif
}
else
#endif
otherd = TABLE_GET(d, fcc, d);
if (c != d && c != otherd)
{ ADD_NEW(state_offset + dlen + 1, 0); }
}
break;
case OP_PLUSI:
case OP_MINPLUSI:
case OP_POSPLUSI:
case OP_NOTPLUSI:
case OP_NOTMINPLUSI:
case OP_NOTPOSPLUSI:
caseless = TRUE;
codevalue -= OP_STARI - OP_STAR;
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
case OP_NOTPLUS:
case OP_NOTMINPLUS:
case OP_NOTPOSPLUS:
count = current_state->count;
if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
if (clen > 0)
{
pcre_uint32 otherd = NOTACHAR;
if (caseless)
{
#ifdef SUPPORT_UTF
if (utf && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = UCD_OTHERCASE(d);
#endif
}
else
#endif
otherd = TABLE_GET(d, fcc, d);
}
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
{
if (count > 0 &&
(codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
{
active_count--;
next_active_state--;
}
count++;
ADD_NEW(state_offset, count);
}
}
break;
case OP_QUERYI:
case OP_MINQUERYI:
case OP_POSQUERYI:
case OP_NOTQUERYI:
case OP_NOTMINQUERYI:
case OP_NOTPOSQUERYI:
caseless = TRUE;
codevalue -= OP_STARI - OP_STAR;
case OP_QUERY:
case OP_MINQUERY:
case OP_POSQUERY:
case OP_NOTQUERY:
case OP_NOTMINQUERY:
case OP_NOTPOSQUERY:
ADD_ACTIVE(state_offset + dlen + 1, 0);
if (clen > 0)
{
pcre_uint32 otherd = NOTACHAR;
if (caseless)
{
#ifdef SUPPORT_UTF
if (utf && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = UCD_OTHERCASE(d);
#endif
}
else
#endif
otherd = TABLE_GET(d, fcc, d);
}
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
{
if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
{
active_count--;
next_active_state--;
}
ADD_NEW(state_offset + dlen + 1, 0);
}
}
break;
case OP_STARI:
case OP_MINSTARI:
case OP_POSSTARI:
case OP_NOTSTARI:
case OP_NOTMINSTARI:
case OP_NOTPOSSTARI:
caseless = TRUE;
codevalue -= OP_STARI - OP_STAR;
case OP_STAR:
case OP_MINSTAR:
case OP_POSSTAR:
case OP_NOTSTAR:
case OP_NOTMINSTAR:
case OP_NOTPOSSTAR:
ADD_ACTIVE(state_offset + dlen + 1, 0);
if (clen > 0)
{
pcre_uint32 otherd = NOTACHAR;
if (caseless)
{
#ifdef SUPPORT_UTF
if (utf && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = UCD_OTHERCASE(d);
#endif
}
else
#endif
otherd = TABLE_GET(d, fcc, d);
}
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
{
if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
{
active_count--;
next_active_state--;
}
ADD_NEW(state_offset, 0);
}
}
break;
case OP_EXACTI:
case OP_NOTEXACTI:
caseless = TRUE;
codevalue -= OP_STARI - OP_STAR;
case OP_EXACT:
case OP_NOTEXACT:
count = current_state->count;
if (clen > 0)
{
pcre_uint32 otherd = NOTACHAR;
if (caseless)
{
#ifdef SUPPORT_UTF
if (utf && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = UCD_OTHERCASE(d);
#endif
}
else
#endif
otherd = TABLE_GET(d, fcc, d);
}
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
{
if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
else
{ ADD_NEW(state_offset, count); }
}
}
break;
case OP_UPTOI:
case OP_MINUPTOI:
case OP_POSUPTOI:
case OP_NOTUPTOI:
case OP_NOTMINUPTOI:
case OP_NOTPOSUPTOI:
caseless = TRUE;
codevalue -= OP_STARI - OP_STAR;
case OP_UPTO:
case OP_MINUPTO:
case OP_POSUPTO:
case OP_NOTUPTO:
case OP_NOTMINUPTO:
case OP_NOTPOSUPTO:
ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
count = current_state->count;
if (clen > 0)
{
pcre_uint32 otherd = NOTACHAR;
if (caseless)
{
#ifdef SUPPORT_UTF
if (utf && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = UCD_OTHERCASE(d);
#endif
}
else
#endif
otherd = TABLE_GET(d, fcc, d);
}
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
{
if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
{
active_count--;
next_active_state--;
}
if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
else
{ ADD_NEW(state_offset, count); }
}
}
break;
case OP_CLASS:
case OP_NCLASS:
case OP_XCLASS:
{
BOOL isinclass = FALSE;
int next_state_offset;
const pcre_uchar *ecode;
if (codevalue != OP_XCLASS)
{
ecode = code + 1 + (32 / sizeof(pcre_uchar));
if (clen > 0)
{
isinclass = (c > 255)? (codevalue == OP_NCLASS) :
((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
}
}
else
{
ecode = code + GET(code, 1);
if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
}
next_state_offset = (int)(ecode - start_code);
switch (*ecode)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRPOSSTAR:
ADD_ACTIVE(next_state_offset + 1, 0);
if (isinclass)
{
if (*ecode == OP_CRPOSSTAR)
{
active_count--;
next_active_state--;
}
ADD_NEW(state_offset, 0);
}
break;
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRPOSPLUS:
count = current_state->count;
if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
if (isinclass)
{
if (count > 0 && *ecode == OP_CRPOSPLUS)
{
active_count--;
next_active_state--;
}
count++;
ADD_NEW(state_offset, count);
}
break;
case OP_CRQUERY:
case OP_CRMINQUERY:
case OP_CRPOSQUERY:
ADD_ACTIVE(next_state_offset + 1, 0);
if (isinclass)
{
if (*ecode == OP_CRPOSQUERY)
{
active_count--;
next_active_state--;
}
ADD_NEW(next_state_offset + 1, 0);
}
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
case OP_CRPOSRANGE:
count = current_state->count;
if (count >= (int)GET2(ecode, 1))
{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
if (isinclass)
{
int max = (int)GET2(ecode, 1 + IMM2_SIZE);
if (*ecode == OP_CRPOSRANGE && count >= (int)GET2(ecode, 1))
{
active_count--;
next_active_state--;
}
if (++count >= max && max != 0)
{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
else
{ ADD_NEW(state_offset, count); }
}
break;
default:
if (isinclass) { ADD_NEW(next_state_offset, 0); }
break;
}
}
break;
case OP_FAIL:
forced_fail++;
break;
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
{
int rc;
int local_offsets[2];
int local_workspace[1000];
const pcre_uchar *endasscode = code + GET(code, 1);
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
rc = internal_dfa_exec(
md,
code,
ptr,
(int)(ptr - start_subject),
local_offsets,
sizeof(local_offsets)/sizeof(int),
local_workspace,
sizeof(local_workspace)/sizeof(int),
rlevel);
if (rc == PCRE_ERROR_DFA_UITEM) return rc;
if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
}
break;
case OP_COND:
case OP_SCOND:
{
int local_offsets[1000];
int local_workspace[1000];
int codelink = GET(code, 1);
int condcode;
if (code[LINK_SIZE+1] == OP_CALLOUT)
{
rrc = 0;
if (PUBL(callout) != NULL)
{
PUBL(callout_block) cb;
cb.version = 1;
cb.callout_number = code[LINK_SIZE+2];
cb.offset_vector = offsets;
#if defined COMPILE_PCRE8
cb.subject = (PCRE_SPTR)start_subject;
#elif defined COMPILE_PCRE16
cb.subject = (PCRE_SPTR16)start_subject;
#elif defined COMPILE_PCRE32
cb.subject = (PCRE_SPTR32)start_subject;
#endif
cb.subject_length = (int)(end_subject - start_subject);
cb.start_match = (int)(current_subject - start_subject);
cb.current_position = (int)(ptr - start_subject);
cb.pattern_position = GET(code, LINK_SIZE + 3);
cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
cb.capture_top = 1;
cb.capture_last = -1;
cb.callout_data = md->callout_data;
cb.mark = NULL;
if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;
}
if (rrc > 0) break;
code += PRIV(OP_lengths)[OP_CALLOUT];
}
condcode = code[LINK_SIZE+1];
if (condcode == OP_CREF || condcode == OP_DNCREF ||
condcode == OP_DNRREF)
return PCRE_ERROR_DFA_UCOND;
if (condcode == OP_DEF || condcode == OP_FAIL)
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
else if (condcode == OP_RREF)
{
int value = GET2(code, LINK_SIZE + 2);
if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
if (md->recursive != NULL)
{ ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
}
else
{
int rc;
const pcre_uchar *asscode = code + LINK_SIZE + 1;
const pcre_uchar *endasscode = asscode + GET(asscode, 1);
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
rc = internal_dfa_exec(
md,
asscode,
ptr,
(int)(ptr - start_subject),
local_offsets,
sizeof(local_offsets)/sizeof(int),
local_workspace,
sizeof(local_workspace)/sizeof(int),
rlevel);
if (rc == PCRE_ERROR_DFA_UITEM) return rc;
if ((rc >= 0) ==
(condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
else
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
}
}
break;
case OP_RECURSE:
{
dfa_recursion_info *ri;
int local_offsets[1000];
int local_workspace[1000];
const pcre_uchar *callpat = start_code + GET(code, 1);
int recno = (callpat == md->start_code)? 0 :
GET2(callpat, 1 + LINK_SIZE);
int rc;
DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
if (recno == ri->group_num && ptr == ri->subject_position)
return PCRE_ERROR_RECURSELOOP;
new_recursive.group_num = recno;
new_recursive.subject_position = ptr;
new_recursive.prevrec = md->recursive;
md->recursive = &new_recursive;
rc = internal_dfa_exec(
md,
callpat,
ptr,
(int)(ptr - start_subject),
local_offsets,
sizeof(local_offsets)/sizeof(int),
local_workspace,
sizeof(local_workspace)/sizeof(int),
rlevel);
md->recursive = new_recursive.prevrec;
DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
rc));
if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
if (rc > 0)
{
for (rc = rc*2 - 2; rc >= 0; rc -= 2)
{
int charcount = local_offsets[rc+1] - local_offsets[rc];
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf)
{
const pcre_uchar *p = start_subject + local_offsets[rc];
const pcre_uchar *pp = start_subject + local_offsets[rc+1];
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
}
#endif
if (charcount > 0)
{
ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
}
else
{
ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
}
}
}
else if (rc != PCRE_ERROR_NOMATCH) return rc;
}
break;
case OP_BRAPOS:
case OP_SBRAPOS:
case OP_CBRAPOS:
case OP_SCBRAPOS:
case OP_BRAPOSZERO:
{
int charcount, matched_count;
const pcre_uchar *local_ptr = ptr;
BOOL allow_zero;
if (codevalue == OP_BRAPOSZERO)
{
allow_zero = TRUE;
codevalue = *(++code);
}
else allow_zero = FALSE;
for (matched_count = 0;; matched_count++)
{
int local_offsets[2];
int local_workspace[1000];
int rc = internal_dfa_exec(
md,
code,
local_ptr,
(int)(ptr - start_subject),
local_offsets,
sizeof(local_offsets)/sizeof(int),
local_workspace,
sizeof(local_workspace)/sizeof(int),
rlevel);
if (rc < 0)
{
if (rc != PCRE_ERROR_NOMATCH) return rc;
break;
}
charcount = local_offsets[1] - local_offsets[0];
if (charcount == 0) break;
local_ptr += charcount;
}
if (matched_count > 0 || allow_zero)
{
const pcre_uchar *end_subpattern = code;
int next_state_offset;
do { end_subpattern += GET(end_subpattern, 1); }
while (*end_subpattern == OP_ALT);
next_state_offset =
(int)(end_subpattern - start_code + LINK_SIZE + 1);
if (i + 1 >= active_count && new_count == 0)
{
ptr = local_ptr;
clen = 0;
ADD_NEW(next_state_offset, 0);
}
else
{
const pcre_uchar *p = ptr;
const pcre_uchar *pp = local_ptr;
charcount = (int)(pp - p);
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
#endif
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
}
}
}
break;
case OP_ONCE:
case OP_ONCE_NC:
{
int local_offsets[2];
int local_workspace[1000];
int rc = internal_dfa_exec(
md,
code,
ptr,
(int)(ptr - start_subject),
local_offsets,
sizeof(local_offsets)/sizeof(int),
local_workspace,
sizeof(local_workspace)/sizeof(int),
rlevel);
if (rc >= 0)
{
const pcre_uchar *end_subpattern = code;
int charcount = local_offsets[1] - local_offsets[0];
int next_state_offset, repeat_state_offset;
do { end_subpattern += GET(end_subpattern, 1); }
while (*end_subpattern == OP_ALT);
next_state_offset =
(int)(end_subpattern - start_code + LINK_SIZE + 1);
repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
*end_subpattern == OP_KETRMIN)?
(int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
if (charcount == 0)
{
ADD_ACTIVE(next_state_offset, 0);
}
else if (i + 1 >= active_count && new_count == 0)
{
ptr += charcount;
clen = 0;
ADD_NEW(next_state_offset, 0);
if (repeat_state_offset >= 0)
{
next_active_state = active_states;
active_count = 0;
i = -1;
ADD_ACTIVE(repeat_state_offset, 0);
}
}
else
{
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf)
{
const pcre_uchar *p = start_subject + local_offsets[0];
const pcre_uchar *pp = start_subject + local_offsets[1];
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
}
#endif
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
if (repeat_state_offset >= 0)
{ ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
}
}
else if (rc != PCRE_ERROR_NOMATCH) return rc;
}
break;
case OP_CALLOUT:
rrc = 0;
if (PUBL(callout) != NULL)
{
PUBL(callout_block) cb;
cb.version = 1;
cb.callout_number = code[1];
cb.offset_vector = offsets;
#if defined COMPILE_PCRE8
cb.subject = (PCRE_SPTR)start_subject;
#elif defined COMPILE_PCRE16
cb.subject = (PCRE_SPTR16)start_subject;
#elif defined COMPILE_PCRE32
cb.subject = (PCRE_SPTR32)start_subject;
#endif
cb.subject_length = (int)(end_subject - start_subject);
cb.start_match = (int)(current_subject - start_subject);
cb.current_position = (int)(ptr - start_subject);
cb.pattern_position = GET(code, 2);
cb.next_item_length = GET(code, 2 + LINK_SIZE);
cb.capture_top = 1;
cb.capture_last = -1;
cb.callout_data = md->callout_data;
cb.mark = NULL;
if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;
}
if (rrc == 0)
{ ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
break;
default:
return PCRE_ERROR_DFA_UITEM;
}
NEXT_ACTIVE_STATE: continue;
}
if (new_count <= 0)
{
if (rlevel == 1 &&
could_continue &&
forced_fail != workspace[1] &&
(
(md->moptions & PCRE_PARTIAL_HARD) != 0
||
((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&
match_count < 0)
) &&
(
partial_newline ||
(
ptr >= end_subject &&
ptr > md->start_used_ptr)
)
)
match_count = PCRE_ERROR_PARTIAL;
DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
"%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
rlevel*2-2, SP));
break;
}
ptr += clen;
}
return match_count;
}
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
const char *subject, int length, int start_offset, int options, int *offsets,
int offsetcount, int *workspace, int wscount)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
int offsetcount, int *workspace, int wscount)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
int offsetcount, int *workspace, int wscount)
#endif
{
REAL_PCRE *re = (REAL_PCRE *)argument_re;
dfa_match_data match_block;
dfa_match_data *md = &match_block;
BOOL utf, anchored, startline, firstline;
const pcre_uchar *current_subject, *end_subject;
const pcre_study_data *study = NULL;
const pcre_uchar *req_char_ptr;
const pcre_uint8 *start_bits = NULL;
BOOL has_first_char = FALSE;
BOOL has_req_char = FALSE;
pcre_uchar first_char = 0;
pcre_uchar first_char2 = 0;
pcre_uchar req_char = 0;
pcre_uchar req_char2 = 0;
int newline;
if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
if (re == NULL || subject == NULL || workspace == NULL ||
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
if (length < 0) return PCRE_ERROR_BADLENGTH;
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
if (re->magic_number != MAGIC_NUMBER)
return re->magic_number == REVERSED_MAGIC_NUMBER?
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
if ((options & PCRE_DFA_RESTART) != 0)
{
if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
return PCRE_ERROR_DFA_BADRESTART;
}
md->tables = re->tables;
md->callout_data = NULL;
if (extra_data != NULL)
{
unsigned long int flags = extra_data->flags;
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
study = (const pcre_study_data *)extra_data->study_data;
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
return PCRE_ERROR_DFA_UMLIMIT;
if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
md->callout_data = extra_data->callout_data;
if ((flags & PCRE_EXTRA_TABLES) != 0)
md->tables = extra_data->tables;
}
current_subject = (const pcre_uchar *)subject + start_offset;
end_subject = (const pcre_uchar *)subject + length;
req_char_ptr = current_subject - 1;
#ifdef SUPPORT_UTF
utf = (re->options & PCRE_UTF8) != 0;
#else
utf = FALSE;
#endif
anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
(re->options & PCRE_ANCHORED) != 0;
md->start_code = (const pcre_uchar *)argument_re +
re->name_table_offset + re->name_count * re->name_entry_size;
md->start_subject = (const pcre_uchar *)subject;
md->end_subject = end_subject;
md->start_offset = start_offset;
md->moptions = options;
md->poptions = re->options;
if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
{
if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
#ifdef BSR_ANYCRLF
else md->moptions |= PCRE_BSR_ANYCRLF;
#endif
}
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
PCRE_NEWLINE_BITS)
{
case 0: newline = NEWLINE; break;
case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
case PCRE_NEWLINE_CR+
PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
case PCRE_NEWLINE_ANY: newline = -1; break;
case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
default: return PCRE_ERROR_BADNEWLINE;
}
if (newline == -2)
{
md->nltype = NLTYPE_ANYCRLF;
}
else if (newline < 0)
{
md->nltype = NLTYPE_ANY;
}
else
{
md->nltype = NLTYPE_FIXED;
if (newline > 255)
{
md->nllen = 2;
md->nl[0] = (newline >> 8) & 255;
md->nl[1] = newline & 255;
}
else
{
md->nllen = 1;
md->nl[0] = newline;
}
}
#ifdef SUPPORT_UTF
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
{
int erroroffset;
int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
if (errorcode != 0)
{
if (offsetcount >= 2)
{
offsets[0] = erroroffset;
offsets[1] = errorcode;
}
#if defined COMPILE_PCRE8
return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
#elif defined COMPILE_PCRE16
return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
#elif defined COMPILE_PCRE32
return PCRE_ERROR_BADUTF32;
#endif
}
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
if (start_offset > 0 && start_offset < length &&
NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
return PCRE_ERROR_BADUTF8_OFFSET;
#endif
}
#endif
if (md->tables == NULL) md->tables = PRIV(default_tables);
startline = (re->flags & PCRE_STARTLINE) != 0;
firstline = (re->options & PCRE_FIRSTLINE) != 0;
if (!anchored)
{
if ((re->flags & PCRE_FIRSTSET) != 0)
{
has_first_char = TRUE;
first_char = first_char2 = (pcre_uchar)(re->first_char);
if ((re->flags & PCRE_FCH_CASELESS) != 0)
{
first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
if (utf && first_char > 127)
first_char2 = UCD_OTHERCASE(first_char);
#endif
}
}
else
{
if (!startline && study != NULL &&
(study->flags & PCRE_STUDY_MAPPED) != 0)
start_bits = study->start_bits;
}
}
if ((re->flags & PCRE_REQCHSET) != 0)
{
has_req_char = TRUE;
req_char = req_char2 = (pcre_uchar)(re->req_char);
if ((re->flags & PCRE_RCH_CASELESS) != 0)
{
req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
if (utf && req_char > 127)
req_char2 = UCD_OTHERCASE(req_char);
#endif
}
}
for (;;)
{
int rc;
if ((options & PCRE_DFA_RESTART) == 0)
{
const pcre_uchar *save_end_subject = end_subject;
if (firstline)
{
PCRE_PUCHAR t = current_subject;
#ifdef SUPPORT_UTF
if (utf)
{
while (t < md->end_subject && !IS_NEWLINE(t))
{
t++;
ACROSSCHAR(t < end_subject, *t, t++);
}
}
else
#endif
while (t < md->end_subject && !IS_NEWLINE(t)) t++;
end_subject = t;
}
if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
{
if (has_first_char)
{
if (first_char != first_char2)
{
pcre_uchar csc;
while (current_subject < end_subject &&
(csc = UCHAR21TEST(current_subject)) != first_char && csc != first_char2)
current_subject++;
}
else
while (current_subject < end_subject &&
UCHAR21TEST(current_subject) != first_char)
current_subject++;
}
else if (startline)
{
if (current_subject > md->start_subject + start_offset)
{
#ifdef SUPPORT_UTF
if (utf)
{
while (current_subject < end_subject &&
!WAS_NEWLINE(current_subject))
{
current_subject++;
ACROSSCHAR(current_subject < end_subject, *current_subject,
current_subject++);
}
}
else
#endif
while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
current_subject++;
if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
current_subject < end_subject &&
UCHAR21TEST(current_subject) == CHAR_NL)
current_subject++;
}
}
else if (start_bits != NULL)
{
while (current_subject < end_subject)
{
register pcre_uint32 c = UCHAR21TEST(current_subject);
#ifndef COMPILE_PCRE8
if (c > 255) c = 255;
#endif
if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
current_subject++;
}
}
}
end_subject = save_end_subject;
if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
(options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
{
if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
(pcre_uint32)(end_subject - current_subject) < study->minlength)
return PCRE_ERROR_NOMATCH;
if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
{
register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
if (p > req_char_ptr)
{
if (req_char != req_char2)
{
while (p < end_subject)
{
register pcre_uint32 pp = UCHAR21INCTEST(p);
if (pp == req_char || pp == req_char2) { p--; break; }
}
}
else
{
while (p < end_subject)
{
if (UCHAR21INCTEST(p) == req_char) { p--; break; }
}
}
if (p >= end_subject) break;
req_char_ptr = p;
}
}
}
}
md->start_used_ptr = current_subject;
md->recursive = NULL;
rc = internal_dfa_exec(
md,
md->start_code,
current_subject,
start_offset,
offsets,
offsetcount,
workspace,
wscount,
0);
if (rc != PCRE_ERROR_NOMATCH || anchored)
{
if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
{
offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
if (offsetcount > 2)
offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
}
return rc;
}
if (firstline && IS_NEWLINE(current_subject)) break;
current_subject++;
#ifdef SUPPORT_UTF
if (utf)
{
ACROSSCHAR(current_subject < end_subject, *current_subject,
current_subject++);
}
#endif
if (current_subject > end_subject) break;
if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
current_subject < end_subject &&
UCHAR21TEST(current_subject) == CHAR_NL &&
(re->flags & PCRE_HASCRORLF) == 0 &&
(md->nltype == NLTYPE_ANY ||
md->nltype == NLTYPE_ANYCRLF ||
md->nllen == 2))
current_subject++;
}
return PCRE_ERROR_NOMATCH;
}