#include "pg_query.h"
#include "pg_query_internal.h"
#include "pg_query_fingerprint.h"
#include "parser/parser.h"
#include "parser/scanner.h"
#include "parser/scansup.h"
#include "mb/pg_wchar.h"
#include "nodes/nodeFuncs.h"
#include "pg_query_outfuncs.h"
typedef struct pgssLocationLen
{
int location;
int length;
int param_id;
} pgssLocationLen;
typedef struct pgssConstLocations
{
pgssLocationLen *clocations;
int clocations_buf_size;
int clocations_count;
int highest_normalize_param_id;
int highest_extern_param_id;
const char * query;
int query_len;
int *param_refs;
int param_refs_buf_size;
int param_refs_count;
bool normalize_utility_only;
} pgssConstLocations;
typedef struct FpAndParamRefs
{
uint64_t fp;
int* param_refs;
int param_refs_count;
} FpAndParamRefs;
static int
comp_location(const void *a, const void *b)
{
int l = ((const pgssLocationLen *) a)->location;
int r = ((const pgssLocationLen *) b)->location;
if (l < r)
return -1;
else if (l > r)
return +1;
else
return 0;
}
static void
fill_in_constant_lengths(pgssConstLocations *jstate, const char *query)
{
pgssLocationLen *locs;
core_yyscan_t yyscanner;
core_yy_extra_type yyextra;
core_YYSTYPE yylval;
YYLTYPE yylloc;
int last_loc = -1;
int i;
if (jstate->clocations_count > 1)
qsort(jstate->clocations, jstate->clocations_count,
sizeof(pgssLocationLen), comp_location);
locs = jstate->clocations;
yyscanner = scanner_init(query,
&yyextra,
&ScanKeywords,
ScanKeywordTokens);
for (i = 0; i < jstate->clocations_count; i++)
{
int loc = locs[i].location;
int tok;
Assert(loc >= 0);
if (loc <= last_loc)
continue;
for (;;)
{
tok = core_yylex(&yylval, &yylloc, yyscanner);
if (tok == 0)
break;
if (yylloc >= loc)
{
if (query[loc] == '-')
{
tok = core_yylex(&yylval, &yylloc, yyscanner);
if (tok == 0)
break;
}
locs[i].length = (int) strlen(yyextra.scanbuf + loc);
if (locs[i].length > 4 &&
(yyextra.scanbuf[loc] == 'u' || yyextra.scanbuf[loc] == 'U') &&
yyextra.scanbuf[loc + 1] == '&' && yyextra.scanbuf[loc + 2] == '\'')
{
int j = locs[i].length - 1;
for (; j >= 0 && scanner_isspace(yyextra.scanbuf[loc + j]); j--) {}
locs[i].length = j + 1;
}
break;
}
}
if (tok == 0)
break;
last_loc = loc;
}
scanner_finish(yyscanner);
}
static char *
generate_normalized_query(pgssConstLocations *jstate, int query_loc, int* query_len_p, int encoding)
{
char *norm_query;
const char *query = jstate->query;
int query_len = *query_len_p;
int i,
norm_query_buflen,
len_to_wrt,
quer_loc = 0,
n_quer_loc = 0,
last_off = 0,
last_tok_len = 0;
fill_in_constant_lengths(jstate, query);
norm_query_buflen = query_len + jstate->clocations_count * 10;
norm_query = palloc(norm_query_buflen + 1);
for (i = 0; i < jstate->clocations_count; i++)
{
int off,
tok_len,
param_id;
off = jstate->clocations[i].location;
off -= query_loc;
tok_len = jstate->clocations[i].length;
if (tok_len < 0)
continue;
len_to_wrt = off - last_off;
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
param_id = (jstate->clocations[i].param_id < 0) ?
jstate->highest_extern_param_id + abs(jstate->clocations[i].param_id) :
jstate->clocations[i].param_id;
n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", param_id);
quer_loc = off + tok_len;
last_off = off;
last_tok_len = tok_len;
}
len_to_wrt = query_len - quer_loc;
Assert(len_to_wrt >= 0);
memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
n_quer_loc += len_to_wrt;
Assert(n_quer_loc <= norm_query_buflen);
norm_query[n_quer_loc] = '\0';
*query_len_p = n_quer_loc;
return norm_query;
}
static void RecordConstLocation(pgssConstLocations *jstate, int location)
{
if (location >= 0)
{
if (jstate->clocations_count >= jstate->clocations_buf_size)
{
jstate->clocations_buf_size *= 2;
jstate->clocations = (pgssLocationLen *)
repalloc(jstate->clocations,
jstate->clocations_buf_size *
sizeof(pgssLocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations[jstate->clocations_count].param_id = - jstate->highest_normalize_param_id;
jstate->highest_normalize_param_id++;
if (jstate->param_refs != NULL) {
jstate->param_refs[jstate->param_refs_count] = jstate->clocations[jstate->clocations_count].param_id;
jstate->param_refs_count++;
if (jstate->param_refs_count >= jstate->param_refs_buf_size) {
jstate->param_refs_buf_size *= 2;
jstate->param_refs = (int *) repalloc(jstate->param_refs, jstate->param_refs_buf_size * sizeof(int));
}
}
jstate->clocations_count++;
}
}
static void record_defelem_arg_location(pgssConstLocations *jstate, int location)
{
for (int i = location; i < jstate->query_len; i++) {
if (jstate->query[i] == '\'' || jstate->query[i] == '$') {
RecordConstLocation(jstate, i);
break;
}
}
}
static void record_matching_string(pgssConstLocations *jstate, const char *str)
{
char *loc = NULL;
if (str == NULL)
return;
loc = strstr(jstate->query, str);
if (loc != NULL)
RecordConstLocation(jstate, loc - jstate->query - 1);
}
static bool const_record_walker(Node *node, pgssConstLocations *jstate)
{
bool result;
MemoryContext normalize_context = CurrentMemoryContext;
if (node == NULL) return false;
switch (nodeTag(node))
{
case T_A_Const:
RecordConstLocation(jstate, castNode(A_Const, node)->location);
break;
case T_ParamRef:
{
if (((ParamRef *) node)->number > jstate->highest_extern_param_id)
jstate->highest_extern_param_id = castNode(ParamRef, node)->number;
if (jstate->param_refs != NULL) {
jstate->param_refs[jstate->param_refs_count] = ((ParamRef *) node)->number;
jstate->param_refs_count++;
if (jstate->param_refs_count >= jstate->param_refs_buf_size) {
jstate->param_refs_buf_size *= 2;
jstate->param_refs = (int *) repalloc(jstate->param_refs, jstate->param_refs_buf_size * sizeof(int));
}
}
}
break;
case T_DefElem:
{
DefElem * defElem = (DefElem *) node;
if (defElem->arg == NULL) {
} else if (IsA(defElem->arg, String)) {
record_defelem_arg_location(jstate, defElem->location);
} else if (IsA(defElem->arg, List) && list_length((List *) defElem->arg) == 1 && IsA(linitial((List *) defElem->arg), String)) {
record_defelem_arg_location(jstate, defElem->location);
}
return const_record_walker((Node *) ((DefElem *) node)->arg, jstate);
}
break;
case T_RawStmt:
return const_record_walker((Node *) ((RawStmt *) node)->stmt, jstate);
case T_VariableSetStmt:
if (jstate->normalize_utility_only) return false;
return const_record_walker((Node *) ((VariableSetStmt *) node)->args, jstate);
case T_CopyStmt:
if (jstate->normalize_utility_only) return false;
return const_record_walker((Node *) ((CopyStmt *) node)->query, jstate);
case T_ExplainStmt:
return const_record_walker((Node *) ((ExplainStmt *) node)->query, jstate);
case T_CreateRoleStmt:
return const_record_walker((Node *) ((CreateRoleStmt *) node)->options, jstate);
case T_AlterRoleStmt:
return const_record_walker((Node *) ((AlterRoleStmt *) node)->options, jstate);
case T_DeclareCursorStmt:
if (jstate->normalize_utility_only) return false;
return const_record_walker((Node *) ((DeclareCursorStmt *) node)->query, jstate);
case T_CreateFunctionStmt:
if (jstate->normalize_utility_only) return false;
return const_record_walker((Node *) ((CreateFunctionStmt *) node)->options, jstate);
case T_DoStmt:
if (jstate->normalize_utility_only) return false;
return const_record_walker((Node *) ((DoStmt *) node)->args, jstate);
case T_CreateSubscriptionStmt:
record_matching_string(jstate, ((CreateSubscriptionStmt *) node)->conninfo);
break;
case T_AlterSubscriptionStmt:
record_matching_string(jstate, ((AlterSubscriptionStmt *) node)->conninfo);
break;
case T_CreateUserMappingStmt:
return const_record_walker((Node *) ((CreateUserMappingStmt *) node)->options, jstate);
case T_AlterUserMappingStmt:
return const_record_walker((Node *) ((AlterUserMappingStmt *) node)->options, jstate);
case T_TypeName:
return false;
case T_SelectStmt:
{
if (jstate->normalize_utility_only) return false;
SelectStmt *stmt = (SelectStmt *) node;
ListCell *lc;
List *fp_and_param_refs_list = NIL;
if (const_record_walker((Node *) stmt->distinctClause, jstate))
return true;
if (const_record_walker((Node *) stmt->intoClause, jstate))
return true;
foreach(lc, stmt->targetList)
{
ResTarget *res_target = lfirst_node(ResTarget, lc);
FpAndParamRefs *fp_and_param_refs = palloc0(sizeof(FpAndParamRefs));
jstate->param_refs = palloc0(1 * sizeof(int));
jstate->param_refs_buf_size = 1;
jstate->param_refs_count = 0;
if (const_record_walker((Node *) res_target, jstate))
return true;
fp_and_param_refs->fp = pg_query_fingerprint_node(res_target->val);
fp_and_param_refs->param_refs = jstate->param_refs;
fp_and_param_refs->param_refs_count = jstate->param_refs_count;
fp_and_param_refs_list = lappend(fp_and_param_refs_list, fp_and_param_refs);
jstate->param_refs = NULL;
jstate->param_refs_buf_size = 0;
jstate->param_refs_count = 0;
}
if (const_record_walker((Node *) stmt->fromClause, jstate))
return true;
if (const_record_walker((Node *) stmt->whereClause, jstate))
return true;
foreach(lc, stmt->groupClause)
{
if (IsA(lfirst(lc), A_Const) && IsA(&castNode(A_Const, lfirst(lc))->val, Integer))
continue;
uint64_t fp = pg_query_fingerprint_node(lfirst(lc));
FpAndParamRefs *fppr = NULL;
ListCell *lc2;
foreach(lc2, fp_and_param_refs_list) {
if (fp == ((FpAndParamRefs *) lfirst(lc2))->fp) {
fppr = (FpAndParamRefs *) lfirst(lc2);
foreach_delete_current(fp_and_param_refs_list, lc2);
break;
}
}
int prev_cloc_count = jstate->clocations_count;
if (const_record_walker((Node *) lfirst(lc), jstate))
return true;
if (fppr != NULL && fppr->param_refs_count == jstate->clocations_count - prev_cloc_count) {
for (int i = prev_cloc_count; i < jstate->clocations_count; i++) {
jstate->clocations[i].param_id = fppr->param_refs[i - prev_cloc_count];
}
jstate->highest_normalize_param_id -= fppr->param_refs_count;
}
}
foreach(lc, stmt->sortClause)
{
if (IsA(lfirst(lc), SortBy) && IsA(castNode(SortBy, lfirst(lc))->node, A_Const) &&
IsA(&castNode(A_Const, castNode(SortBy, lfirst(lc))->node)->val, Integer))
continue;
if (const_record_walker((Node *) lfirst(lc), jstate))
return true;
}
if (const_record_walker((Node *) stmt->havingClause, jstate))
return true;
if (const_record_walker((Node *) stmt->windowClause, jstate))
return true;
if (const_record_walker((Node *) stmt->valuesLists, jstate))
return true;
if (const_record_walker((Node *) stmt->limitOffset, jstate))
return true;
if (const_record_walker((Node *) stmt->limitCount, jstate))
return true;
if (const_record_walker((Node *) stmt->lockingClause, jstate))
return true;
if (const_record_walker((Node *) stmt->withClause, jstate))
return true;
if (const_record_walker((Node *) stmt->larg, jstate))
return true;
if (const_record_walker((Node *) stmt->rarg, jstate))
return true;
return false;
}
case T_MergeStmt:
{
if (jstate->normalize_utility_only) return false;
return raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
}
case T_InsertStmt:
{
if (jstate->normalize_utility_only) return false;
return raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
}
case T_UpdateStmt:
{
if (jstate->normalize_utility_only) return false;
return raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
}
case T_DeleteStmt:
{
if (jstate->normalize_utility_only) return false;
return raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
}
default:
{
PG_TRY();
{
return raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
}
PG_CATCH();
{
MemoryContextSwitchTo(normalize_context);
FlushErrorState();
}
PG_END_TRY();
}
}
return false;
}
PgQueryNormalizeResult pg_query_normalize_ext(const char* input, bool normalize_utility_only)
{
MemoryContext ctx = NULL;
PgQueryNormalizeResult result = {0};
ctx = pg_query_enter_memory_context();
PG_TRY();
{
List *tree;
pgssConstLocations jstate;
int query_len;
tree = raw_parser(input, RAW_PARSE_DEFAULT);
query_len = (int) strlen(input);
jstate.clocations_buf_size = 32;
jstate.clocations = (pgssLocationLen *)
palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
jstate.clocations_count = 0;
jstate.highest_normalize_param_id = 1;
jstate.highest_extern_param_id = 0;
jstate.query = input;
jstate.query_len = query_len;
jstate.param_refs = NULL;
jstate.param_refs_buf_size = 0;
jstate.param_refs_count = 0;
jstate.normalize_utility_only = normalize_utility_only;
const_record_walker((Node *) tree, &jstate);
result.normalized_query = strdup(generate_normalized_query(&jstate, 0, &query_len, PG_UTF8));
}
PG_CATCH();
{
ErrorData* error_data;
PgQueryError* error;
MemoryContextSwitchTo(ctx);
error_data = CopyErrorData();
error = malloc(sizeof(PgQueryError));
error->message = strdup(error_data->message);
error->filename = strdup(error_data->filename);
error->funcname = strdup(error_data->funcname);
error->context = NULL;
error->lineno = error_data->lineno;
error->cursorpos = error_data->cursorpos;
result.error = error;
FlushErrorState();
}
PG_END_TRY();
pg_query_exit_memory_context(ctx);
return result;
}
PgQueryNormalizeResult pg_query_normalize(const char* input)
{
return pg_query_normalize_ext(input, false);
}
PgQueryNormalizeResult pg_query_normalize_utility(const char* input)
{
return pg_query_normalize_ext(input, true);
}
void pg_query_free_normalize_result(PgQueryNormalizeResult result)
{
if (result.error) {
free(result.error->message);
free(result.error->filename);
free(result.error->funcname);
free(result.error);
}
free(result.normalized_query);
}