#include "fuse.h"
#include "mem/sys.h"
#include <string.h>
static bool is_elementwise(uint16_t opcode) {
return (opcode >= OP_NEG && opcode <= OP_CAST) ||
(opcode >= OP_ADD && opcode <= OP_MAX2);
}
static ray_op_ext_t* find_ext(ray_graph_t* g, uint32_t node_id) {
for (uint32_t i = 0; i < g->ext_count; i++) {
if (g->ext_nodes[i] && g->ext_nodes[i]->base.id == node_id)
return g->ext_nodes[i];
}
return NULL;
}
static void count_refs(ray_graph_t* g, ray_op_t* root, uint32_t* ref_counts) {
if (!root) return;
uint32_t nc = g->node_count;
if (nc > UINT32_MAX / 2) return;
uint32_t stack_cap = nc * 2;
uint32_t stack_local[256];
uint32_t *stack = stack_cap <= 256 ? stack_local : (uint32_t*)ray_sys_alloc(stack_cap * sizeof(uint32_t));
if (!stack) return;
int sp = 0;
stack[sp++] = root->id;
while (sp > 0) {
uint32_t nid = stack[--sp];
ray_op_t* n = &g->nodes[nid];
ref_counts[nid]++;
if (ref_counts[nid] > 1) continue;
for (int i = 0; i < n->arity && i < 2; i++) {
if (n->inputs[i] && sp < (int)stack_cap)
stack[sp++] = n->inputs[i]->id;
}
if (n->opcode == OP_IF || n->opcode == OP_SUBSTR || n->opcode == OP_REPLACE) {
ray_op_ext_t* ext = find_ext(g, nid);
if (ext) {
uint32_t third_id = (uint32_t)(uintptr_t)ext->literal;
if (third_id < nc && sp < (int)stack_cap)
stack[sp++] = third_id;
}
}
if (n->opcode == OP_CONCAT) {
ray_op_ext_t* ext = find_ext(g, nid);
if (ext && ext->sym >= 2) {
int n_args = (int)ext->sym;
uint32_t* trail = (uint32_t*)((char*)(ext + 1));
for (int i = 2; i < n_args; i++) {
uint32_t arg_id = trail[i - 2];
if (arg_id < nc && sp < (int)stack_cap)
stack[sp++] = arg_id;
}
}
}
if (n->opcode == OP_GROUP || n->opcode == OP_SORT ||
n->opcode == OP_JOIN || n->opcode == OP_WINDOW_JOIN ||
n->opcode == OP_WINDOW ||
n->opcode == OP_SELECT) {
ray_op_ext_t* ext = find_ext(g, nid);
if (ext) {
switch (n->opcode) {
case OP_GROUP:
for (uint8_t k = 0; k < ext->n_keys; k++) {
if (ext->keys[k] && sp < (int)stack_cap)
stack[sp++] = ext->keys[k]->id;
}
for (uint8_t a = 0; a < ext->n_aggs; a++) {
if (ext->agg_ins[a] && sp < (int)stack_cap)
stack[sp++] = ext->agg_ins[a]->id;
}
break;
case OP_SORT:
case OP_SELECT:
for (uint8_t k = 0; k < ext->sort.n_cols; k++) {
if (ext->sort.columns[k] && sp < (int)stack_cap)
stack[sp++] = ext->sort.columns[k]->id;
}
break;
case OP_JOIN:
for (uint8_t k = 0; k < ext->join.n_join_keys; k++) {
if (ext->join.left_keys[k] && sp < (int)stack_cap)
stack[sp++] = ext->join.left_keys[k]->id;
if (ext->join.right_keys && ext->join.right_keys[k] && sp < (int)stack_cap)
stack[sp++] = ext->join.right_keys[k]->id;
}
break;
case OP_WINDOW_JOIN:
if (ext->asof.time_key && sp < (int)stack_cap)
stack[sp++] = ext->asof.time_key->id;
for (uint8_t k = 0; k < ext->asof.n_eq_keys; k++) {
if (ext->asof.eq_keys[k] && sp < (int)stack_cap)
stack[sp++] = ext->asof.eq_keys[k]->id;
}
break;
case OP_WINDOW:
for (uint8_t k = 0; k < ext->window.n_part_keys; k++) {
if (ext->window.part_keys[k] && sp < (int)stack_cap)
stack[sp++] = ext->window.part_keys[k]->id;
}
for (uint8_t k = 0; k < ext->window.n_order_keys; k++) {
if (ext->window.order_keys[k] && sp < (int)stack_cap)
stack[sp++] = ext->window.order_keys[k]->id;
}
for (uint8_t f = 0; f < ext->window.n_funcs; f++) {
if (ext->window.func_inputs[f] && sp < (int)stack_cap)
stack[sp++] = ext->window.func_inputs[f]->id;
}
break;
default:
break;
}
}
}
}
if (stack_cap > 256) ray_sys_free(stack);
}
void ray_fuse_pass(ray_graph_t* g, ray_op_t* root) {
if (!g || !root || g->node_count == 0) return;
uint32_t nc = g->node_count;
uint32_t* ref_counts;
uint32_t ref_counts_stack[256];
if (nc <= 256) {
ref_counts = ref_counts_stack;
} else {
ref_counts = (uint32_t*)ray_sys_alloc(nc * sizeof(uint32_t));
if (!ref_counts) return;
}
memset(ref_counts, 0, nc * sizeof(uint32_t));
count_refs(g, root, ref_counts);
for (uint32_t i = 0; i < nc; i++) {
ray_op_t* n = &g->nodes[i];
if (!is_elementwise(n->opcode)) continue;
if (n->flags & OP_FLAG_DEAD) continue;
bool can_fuse = false;
for (int j = 0; j < n->arity && j < 2; j++) {
ray_op_t* inp = n->inputs[j];
if (inp && is_elementwise(inp->opcode) && ref_counts[inp->id] == 1) {
can_fuse = true;
}
}
if (can_fuse) {
n->flags |= OP_FLAG_FUSED;
}
}
if (nc > 256) ray_sys_free(ref_counts);
}