#include "postgres.h"
#include "gramparse.h"
#include "mb/pg_wchar.h"
#include "parser/parser.h"
#include "parser/scansup.h"
static bool check_uescapechar(unsigned char escape);
static char *str_udeescape(const char *str, char escape,
int position, core_yyscan_t yyscanner);
List *
raw_parser(const char *str, RawParseMode mode)
{
core_yyscan_t yyscanner;
base_yy_extra_type yyextra;
int yyresult;
yyscanner = scanner_init(str, &yyextra.core_yy_extra,
&ScanKeywords, ScanKeywordTokens);
if (mode == RAW_PARSE_DEFAULT)
yyextra.have_lookahead = false;
else
{
static const int mode_token[] = {
[RAW_PARSE_DEFAULT] = 0,
[RAW_PARSE_TYPE_NAME] = MODE_TYPE_NAME,
[RAW_PARSE_PLPGSQL_EXPR] = MODE_PLPGSQL_EXPR,
[RAW_PARSE_PLPGSQL_ASSIGN1] = MODE_PLPGSQL_ASSIGN1,
[RAW_PARSE_PLPGSQL_ASSIGN2] = MODE_PLPGSQL_ASSIGN2,
[RAW_PARSE_PLPGSQL_ASSIGN3] = MODE_PLPGSQL_ASSIGN3,
};
yyextra.have_lookahead = true;
yyextra.lookahead_token = mode_token[mode];
yyextra.lookahead_yylloc = 0;
yyextra.lookahead_end = NULL;
}
parser_init(&yyextra);
yyresult = base_yyparse(yyscanner);
scanner_finish(yyscanner);
if (yyresult)
return NIL;
return yyextra.parsetree;
}
int
base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
{
base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
int cur_token;
int next_token;
int cur_token_length;
YYLTYPE cur_yylloc;
if (yyextra->have_lookahead)
{
cur_token = yyextra->lookahead_token;
lvalp->core_yystype = yyextra->lookahead_yylval;
*llocp = yyextra->lookahead_yylloc;
if (yyextra->lookahead_end)
*(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
yyextra->have_lookahead = false;
}
else
cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
switch (cur_token)
{
case FORMAT:
cur_token_length = 6;
break;
case NOT:
cur_token_length = 3;
break;
case NULLS_P:
cur_token_length = 5;
break;
case WITH:
cur_token_length = 4;
break;
case UIDENT:
case USCONST:
cur_token_length = strlen(yyextra->core_yy_extra.scanbuf + *llocp);
break;
case WITHOUT:
cur_token_length = 7;
break;
case SQL_COMMENT:
case C_COMMENT:
return base_yylex(lvalp, llocp, yyscanner);
default:
return cur_token;
}
yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf +
*llocp + cur_token_length;
Assert(*(yyextra->lookahead_end) == '\0');
cur_yylloc = *llocp;
next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner);
yyextra->lookahead_token = next_token;
yyextra->lookahead_yylloc = *llocp;
*llocp = cur_yylloc;
yyextra->lookahead_hold_char = *(yyextra->lookahead_end);
*(yyextra->lookahead_end) = '\0';
yyextra->have_lookahead = true;
switch (cur_token)
{
case FORMAT:
switch (next_token)
{
case JSON:
cur_token = FORMAT_LA;
break;
}
break;
case NOT:
switch (next_token)
{
case BETWEEN:
case IN_P:
case LIKE:
case ILIKE:
case SIMILAR:
cur_token = NOT_LA;
break;
}
break;
case NULLS_P:
switch (next_token)
{
case FIRST_P:
case LAST_P:
cur_token = NULLS_LA;
break;
}
break;
case WITH:
switch (next_token)
{
case TIME:
case ORDINALITY:
cur_token = WITH_LA;
break;
}
break;
case WITHOUT:
switch (next_token)
{
case TIME:
cur_token = WITHOUT_LA;
break;
}
break;
case UIDENT:
case USCONST:
if (next_token == UESCAPE)
{
const char *escstr;
cur_yylloc = *llocp;
*(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
next_token = core_yylex(&(yyextra->lookahead_yylval),
llocp, yyscanner);
if (next_token != SCONST)
scanner_yyerror("UESCAPE must be followed by a simple string literal",
yyscanner);
escstr = yyextra->lookahead_yylval.str;
if (strlen(escstr) != 1 || !check_uescapechar(escstr[0]))
scanner_yyerror("invalid Unicode escape character",
yyscanner);
*llocp = cur_yylloc;
lvalp->core_yystype.str =
str_udeescape(lvalp->core_yystype.str,
escstr[0],
*llocp,
yyscanner);
yyextra->have_lookahead = false;
}
else
{
lvalp->core_yystype.str =
str_udeescape(lvalp->core_yystype.str,
'\\',
*llocp,
yyscanner);
}
if (cur_token == UIDENT)
{
truncate_identifier(lvalp->core_yystype.str,
strlen(lvalp->core_yystype.str),
true);
cur_token = IDENT;
}
else if (cur_token == USCONST)
{
cur_token = SCONST;
}
break;
}
return cur_token;
}
static unsigned int
hexval(unsigned char c)
{
if (c >= '0' && c <= '9')
return c - '0';
if (c >= 'a' && c <= 'f')
return c - 'a' + 0xA;
if (c >= 'A' && c <= 'F')
return c - 'A' + 0xA;
elog(ERROR, "invalid hexadecimal digit");
return 0;
}
static void
check_unicode_value(pg_wchar c)
{
if (!is_valid_unicode_codepoint(c))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid Unicode escape value")));
}
static bool
check_uescapechar(unsigned char escape)
{
if (isxdigit(escape)
|| escape == '+'
|| escape == '\''
|| escape == '"'
|| scanner_isspace(escape))
return false;
else
return true;
}
static char *
str_udeescape(const char *str, char escape,
int position, core_yyscan_t yyscanner)
{
const char *in;
char *new,
*out;
size_t new_len;
pg_wchar pair_first = 0;
ScannerCallbackState scbstate;
new_len = strlen(str) + MAX_UNICODE_EQUIVALENT_STRING + 1;
new = palloc(new_len);
in = str;
out = new;
while (*in)
{
size_t out_dist = out - new;
if (out_dist > new_len - (MAX_UNICODE_EQUIVALENT_STRING + 1))
{
new_len *= 2;
new = repalloc(new, new_len);
out = new + out_dist;
}
if (in[0] == escape)
{
setup_scanner_errposition_callback(&scbstate, yyscanner,
in - str + position + 3);
if (in[1] == escape)
{
if (pair_first)
goto invalid_pair;
*out++ = escape;
in += 2;
}
else if (isxdigit((unsigned char) in[1]) &&
isxdigit((unsigned char) in[2]) &&
isxdigit((unsigned char) in[3]) &&
isxdigit((unsigned char) in[4]))
{
pg_wchar unicode;
unicode = (hexval(in[1]) << 12) +
(hexval(in[2]) << 8) +
(hexval(in[3]) << 4) +
hexval(in[4]);
check_unicode_value(unicode);
if (pair_first)
{
if (is_utf16_surrogate_second(unicode))
{
unicode = surrogate_pair_to_codepoint(pair_first, unicode);
pair_first = 0;
}
else
goto invalid_pair;
}
else if (is_utf16_surrogate_second(unicode))
goto invalid_pair;
if (is_utf16_surrogate_first(unicode))
pair_first = unicode;
else
{
pg_unicode_to_server(unicode, (unsigned char *) out);
out += strlen(out);
}
in += 5;
}
else if (in[1] == '+' &&
isxdigit((unsigned char) in[2]) &&
isxdigit((unsigned char) in[3]) &&
isxdigit((unsigned char) in[4]) &&
isxdigit((unsigned char) in[5]) &&
isxdigit((unsigned char) in[6]) &&
isxdigit((unsigned char) in[7]))
{
pg_wchar unicode;
unicode = (hexval(in[2]) << 20) +
(hexval(in[3]) << 16) +
(hexval(in[4]) << 12) +
(hexval(in[5]) << 8) +
(hexval(in[6]) << 4) +
hexval(in[7]);
check_unicode_value(unicode);
if (pair_first)
{
if (is_utf16_surrogate_second(unicode))
{
unicode = surrogate_pair_to_codepoint(pair_first, unicode);
pair_first = 0;
}
else
goto invalid_pair;
}
else if (is_utf16_surrogate_second(unicode))
goto invalid_pair;
if (is_utf16_surrogate_first(unicode))
pair_first = unicode;
else
{
pg_unicode_to_server(unicode, (unsigned char *) out);
out += strlen(out);
}
in += 8;
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid Unicode escape"),
errhint("Unicode escapes must be \\XXXX or \\+XXXXXX.")));
cancel_scanner_errposition_callback(&scbstate);
}
else
{
if (pair_first)
goto invalid_pair;
*out++ = *in++;
}
}
if (pair_first)
goto invalid_pair;
*out = '\0';
return new;
invalid_pair:
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid Unicode surrogate pair"),
scanner_errposition(in - str + position + 3,
yyscanner)));
return NULL;
}