#ifndef RAY_OPS_H
#define RAY_OPS_H
#include <rayforce.h>
#include "store/hnsw.h"
#ifdef __cplusplus
extern "C" {
#endif
#define RAY_SEL 14
#define RAY_LAZY 104
typedef struct ray_pool ray_pool_t;
typedef struct ray_csr ray_csr_t;
typedef struct ray_rel ray_rel_t;
typedef struct ray_hnsw ray_hnsw_t;
typedef struct ray_graph ray_graph_t;
typedef struct ray_op ray_op_t;
static inline bool ray_is_lazy(ray_t* x) {
return x && !RAY_IS_ERR(x) && x->type == RAY_LAZY;
}
ray_t* ray_lazy_materialize(ray_t* val);
void ray_cancel(void);
#define RAY_PARTED_BASE 32
#define RAY_MAPCOMMON 64
#define RAY_MC_SYM 0
#define RAY_MC_DATE 1
#define RAY_MC_I64 2
#define RAY_IS_PARTED(t) ((t) >= RAY_PARTED_BASE && (t) < RAY_MAPCOMMON)
#define RAY_PARTED_BASETYPE(t) ((t) - RAY_PARTED_BASE)
#define RAY_MORSEL_ELEMS 1024
#define RAY_SLAB_CACHE_SIZE 64
#define RAY_SLAB_ORDERS 5
#define RAY_ORDER_MIN 6
#define RAY_ORDER_MAX 30
#define RAY_PARALLEL_THRESHOLD (64 * RAY_MORSEL_ELEMS)
#define RAY_DISPATCH_MORSELS 8
#define RAY_JOIN_L2_TARGET (256 * 1024)
#define RAY_JOIN_MIN_RADIX 2
#define RAY_JOIN_MAX_RADIX 14
#define OP_SCAN 1
#define OP_CONST 2
#define OP_NEG 10
#define OP_ABS 11
#define OP_NOT 12
#define OP_SQRT 13
#define OP_LOG 14
#define OP_EXP 15
#define OP_CEIL 16
#define OP_FLOOR 17
#define OP_ISNULL 18
#define OP_CAST 19
#define OP_ROUND 9
#define OP_ADD 20
#define OP_SUB 21
#define OP_MUL 22
#define OP_DIV 23
#define OP_MOD 24
#define OP_EQ 25
#define OP_NE 26
#define OP_LT 27
#define OP_LE 28
#define OP_GT 29
#define OP_GE 30
#define OP_AND 31
#define OP_OR 32
#define OP_MIN2 33
#define OP_MAX2 34
#define OP_IF 35
#define OP_LIKE 36
#define OP_UPPER 37
#define OP_LOWER 38
#define OP_STRLEN 39
#define OP_SUBSTR 40
#define OP_REPLACE 41
#define OP_TRIM 42
#define OP_CONCAT 43
#define OP_EXTRACT 45
#define OP_DATE_TRUNC 46
#define OP_IN 47
#define OP_NOT_IN 48
#define RAY_EXTRACT_YEAR 0
#define RAY_EXTRACT_MONTH 1
#define RAY_EXTRACT_DAY 2
#define RAY_EXTRACT_HOUR 3
#define RAY_EXTRACT_MINUTE 4
#define RAY_EXTRACT_SECOND 5
#define RAY_EXTRACT_DOW 6
#define RAY_EXTRACT_DOY 7
#define RAY_EXTRACT_EPOCH 8
#define OP_SUM 50
#define OP_PROD 51
#define OP_MIN 52
#define OP_MAX 53
#define OP_COUNT 54
#define OP_AVG 55
#define OP_FIRST 56
#define OP_LAST 57
#define OP_COUNT_DISTINCT 58
#define OP_STDDEV 59
#define OP_FILTER 60
#define OP_SORT 61
#define OP_GROUP 62
#define OP_JOIN 63
#define OP_WINDOW_JOIN 64
#define OP_SELECT 66
#define OP_HEAD 67
#define OP_TAIL 68
#define OP_WINDOW 72
#define OP_STDDEV_POP 73
#define OP_VAR 74
#define OP_VAR_POP 75
#define OP_ILIKE 76
#define OP_PIVOT 77
#define OP_ANTIJOIN 78
#define OP_EXPAND 80
#define OP_VAR_EXPAND 81
#define OP_SHORTEST_PATH 82
#define OP_WCO_JOIN 83
#define OP_PAGERANK 84
#define OP_CONNECTED_COMP 85
#define OP_DIJKSTRA 86
#define OP_LOUVAIN 87
#define OP_DEGREE_CENT 92
#define OP_TOPSORT 93
#define OP_DFS 94
#define OP_ASTAR 95
#define OP_K_SHORTEST 96
#define OP_CLUSTER_COEFF 97
#define OP_RANDOM_WALK 98
#define OP_BETWEENNESS 99
#define OP_CLOSENESS 100
#define OP_MST 101
#define OP_ANN_RERANK 102
#define OP_KNN_RERANK 103
#define OP_ALIAS 70
#define OP_MATERIALIZE 71
#define RAY_WIN_ROW_NUMBER 0
#define RAY_WIN_RANK 1
#define RAY_WIN_DENSE_RANK 2
#define RAY_WIN_NTILE 3
#define RAY_WIN_SUM 4
#define RAY_WIN_AVG 5
#define RAY_WIN_MIN 6
#define RAY_WIN_MAX 7
#define RAY_WIN_COUNT 8
#define RAY_WIN_LAG 9
#define RAY_WIN_LEAD 10
#define RAY_WIN_FIRST_VALUE 11
#define RAY_WIN_LAST_VALUE 12
#define RAY_WIN_NTH_VALUE 13
#define RAY_FRAME_ROWS 0
#define RAY_FRAME_RANGE 1
#define RAY_BOUND_UNBOUNDED_PRECEDING 0
#define RAY_BOUND_N_PRECEDING 1
#define RAY_BOUND_CURRENT_ROW 2
#define RAY_BOUND_N_FOLLOWING 3
#define RAY_BOUND_UNBOUNDED_FOLLOWING 4
#define OP_FLAG_FUSED 0x01
#define OP_FLAG_DEAD 0x02
typedef struct ray_op {
uint16_t opcode;
uint8_t arity;
uint8_t flags;
int8_t out_type;
uint8_t pad[3];
uint32_t id;
uint32_t est_rows;
struct ray_op* inputs[2];
} ray_op_t;
typedef struct ray_op_ext {
ray_op_t base;
union {
ray_t* literal;
int64_t sym;
struct {
ray_op_t** keys;
uint8_t n_keys;
uint8_t n_aggs;
uint16_t* agg_ops;
ray_op_t** agg_ins;
};
struct {
ray_op_t** columns;
uint8_t* desc;
uint8_t* nulls_first;
uint8_t n_cols;
} sort;
struct {
ray_op_t** left_keys;
ray_op_t** right_keys;
uint8_t n_join_keys;
uint8_t join_type;
} join;
struct {
ray_op_t* time_key;
ray_op_t** eq_keys;
uint8_t n_eq_keys;
uint8_t join_type;
} asof;
struct {
ray_op_t** part_keys;
ray_op_t** order_keys;
uint8_t* order_descs;
ray_op_t** func_inputs;
uint8_t* func_kinds;
int64_t* func_params;
uint8_t n_part_keys;
uint8_t n_order_keys;
uint8_t n_funcs;
uint8_t frame_type;
uint8_t frame_start;
uint8_t frame_end;
int64_t frame_start_n;
int64_t frame_end_n;
} window;
struct {
void* rel;
void* sip_sel;
uint8_t direction;
uint8_t min_depth;
uint8_t max_depth;
uint8_t path_tracking;
uint8_t factorized;
uint16_t max_iter;
double damping;
int64_t weight_col_sym;
int64_t coord_col_syms[2];
void* node_props;
} graph;
struct {
void** rels;
uint8_t n_rels;
uint8_t n_vars;
} wco;
struct {
void* hnsw_idx;
int64_t col_sym;
float* query_vec;
int32_t dim;
int32_t metric;
int64_t k;
int32_t ef_search;
} rerank;
struct {
ray_op_t** index_cols;
ray_op_t* pivot_col;
ray_op_t* value_col;
uint16_t agg_op;
uint8_t n_index;
} pivot;
};
uint64_t* seg_mask;
int64_t seg_mask_count;
} ray_op_ext_t;
typedef struct ray_graph {
ray_op_t* nodes;
uint32_t node_count;
uint32_t node_cap;
ray_t* table;
ray_t** tables;
uint16_t n_tables;
ray_op_ext_t** ext_nodes;
uint32_t ext_count;
uint32_t ext_cap;
ray_t* selection;
struct {
int64_t sym;
uint32_t node_id;
} cexpr_env[32];
int cexpr_env_top;
} ray_graph_t;
typedef struct {
ray_t* vec;
int64_t offset;
int64_t len;
uint32_t elem_size;
int64_t morsel_len;
void* morsel_ptr;
uint8_t* null_bits;
} ray_morsel_t;
#define RAY_SEL_NONE 0
#define RAY_SEL_ALL 1
#define RAY_SEL_MIX 2
#define RAY_SEL_WORDS_PER_SEG (RAY_MORSEL_ELEMS / 64)
typedef struct {
int64_t total_pass;
uint32_t n_segs;
uint32_t _pad;
} ray_sel_meta_t;
static inline ray_sel_meta_t* ray_sel_meta(ray_t* s) {
return (ray_sel_meta_t*)ray_data(s);
}
static inline uint8_t* ray_sel_flags(ray_t* s) {
return (uint8_t*)ray_data(s) + sizeof(ray_sel_meta_t);
}
static inline uint16_t* ray_sel_popcnt(ray_t* s) {
uint32_t n = ray_sel_meta(s)->n_segs;
return (uint16_t*)(ray_sel_flags(s) + ((n + 7u) & ~7u));
}
static inline uint64_t* ray_sel_bits(ray_t* s) {
uint32_t n = ray_sel_meta(s)->n_segs;
uint16_t* pc = ray_sel_popcnt(s);
return (uint64_t*)(pc + ((n + 3u) & ~3u));
}
#define RAY_SEL_BIT_TEST(bits, r) ((bits)[(r) >> 6] & (1ULL << ((r) & 63)))
#define RAY_SEL_BIT_SET(bits, r) ((bits)[(r) >> 6] |= (1ULL << ((r) & 63)))
#define RAY_SEL_BIT_CLR(bits, r) ((bits)[(r) >> 6] &= ~(1ULL << ((r) & 63)))
typedef struct ray_pipe {
ray_op_t* op;
struct ray_pipe* inputs[2];
ray_morsel_t state;
ray_t* materialized;
int spill_fd;
} ray_pipe_t;
ray_t* ray_sel_new(int64_t nrows);
ray_t* ray_sel_from_pred(ray_t* bool_vec);
ray_t* ray_sel_and(ray_t* a, ray_t* b);
void ray_sel_recompute(ray_t* sel);
void ray_morsel_init(ray_morsel_t* m, ray_t* vec);
void ray_morsel_init_range(ray_morsel_t* m, ray_t* vec, int64_t start, int64_t end);
bool ray_morsel_next(ray_morsel_t* m);
ray_graph_t* ray_graph_new(ray_t* tbl);
void ray_graph_free(ray_graph_t* g);
ray_op_t* ray_scan(ray_graph_t* g, const char* col_name);
ray_op_t* ray_const_f64(ray_graph_t* g, double val);
ray_op_t* ray_const_i64(ray_graph_t* g, int64_t val);
ray_op_t* ray_const_bool(ray_graph_t* g, bool val);
ray_op_t* ray_const_str(ray_graph_t* g, const char* s, size_t len);
ray_op_t* ray_const_vec(ray_graph_t* g, ray_t* vec);
ray_op_t* ray_const_atom(ray_graph_t* g, ray_t* atom);
ray_op_t* ray_const_table(ray_graph_t* g, ray_t* table);
ray_op_t* ray_neg(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_abs(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_not(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_sqrt_op(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_log_op(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_exp_op(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_ceil_op(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_floor_op(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_round_op(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_isnull(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_cast(ray_graph_t* g, ray_op_t* a, int8_t target_type);
ray_op_t* ray_binop(ray_graph_t* g, uint16_t opcode, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_add(ray_graph_t* g, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_sub(ray_graph_t* g, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_mul(ray_graph_t* g, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_div(ray_graph_t* g, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_mod(ray_graph_t* g, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_eq(ray_graph_t* g, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_ne(ray_graph_t* g, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_lt(ray_graph_t* g, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_le(ray_graph_t* g, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_gt(ray_graph_t* g, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_ge(ray_graph_t* g, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_and(ray_graph_t* g, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_or(ray_graph_t* g, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_min2(ray_graph_t* g, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_max2(ray_graph_t* g, ray_op_t* a, ray_op_t* b);
ray_op_t* ray_in(ray_graph_t* g, ray_op_t* col, ray_op_t* set);
ray_op_t* ray_not_in(ray_graph_t* g, ray_op_t* col, ray_op_t* set);
ray_op_t* ray_if(ray_graph_t* g, ray_op_t* cond, ray_op_t* then_val, ray_op_t* else_val);
ray_op_t* ray_like(ray_graph_t* g, ray_op_t* input, ray_op_t* pattern);
ray_op_t* ray_ilike(ray_graph_t* g, ray_op_t* input, ray_op_t* pattern);
ray_op_t* ray_upper(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_lower(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_strlen(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_substr(ray_graph_t* g, ray_op_t* str, ray_op_t* start, ray_op_t* len);
ray_op_t* ray_replace(ray_graph_t* g, ray_op_t* str, ray_op_t* from, ray_op_t* to);
ray_op_t* ray_trim_op(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_concat(ray_graph_t* g, ray_op_t** args, int n);
ray_op_t* ray_extract(ray_graph_t* g, ray_op_t* col, int64_t field);
ray_op_t* ray_date_trunc(ray_graph_t* g, ray_op_t* col, int64_t field);
ray_op_t* ray_sum(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_prod(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_min_op(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_max_op(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_count(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_avg(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_first(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_last(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_count_distinct(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_stddev(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_stddev_pop(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_var(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_var_pop(ray_graph_t* g, ray_op_t* a);
ray_op_t* ray_filter(ray_graph_t* g, ray_op_t* input, ray_op_t* predicate);
ray_op_t* ray_sort_op(ray_graph_t* g, ray_op_t* table_node,
ray_op_t** keys, uint8_t* descs, uint8_t* nulls_first,
uint8_t n_cols);
ray_op_t* ray_group(ray_graph_t* g, ray_op_t** keys, uint8_t n_keys,
uint16_t* agg_ops, ray_op_t** agg_ins, uint8_t n_aggs);
ray_op_t* ray_distinct(ray_graph_t* g, ray_op_t** keys, uint8_t n_keys);
ray_op_t* ray_pivot_op(ray_graph_t* g,
ray_op_t** index_cols, uint8_t n_index,
ray_op_t* pivot_col,
ray_op_t* value_col,
uint16_t agg_op);
ray_op_t* ray_join(ray_graph_t* g,
ray_op_t* left_table, ray_op_t** left_keys,
ray_op_t* right_table, ray_op_t** right_keys,
uint8_t n_keys, uint8_t join_type);
ray_op_t* ray_antijoin(ray_graph_t* g,
ray_op_t* left_table, ray_op_t** left_keys,
ray_op_t* right_table, ray_op_t** right_keys,
uint8_t n_keys);
ray_op_t* ray_asof_join(ray_graph_t* g,
ray_op_t* left_table, ray_op_t* right_table,
ray_op_t* time_key,
ray_op_t** eq_keys, uint8_t n_eq_keys,
uint8_t join_type);
ray_op_t* ray_window_op(ray_graph_t* g, ray_op_t* table_node,
ray_op_t** part_keys, uint8_t n_part,
ray_op_t** order_keys, uint8_t* order_descs, uint8_t n_order,
uint8_t* func_kinds, ray_op_t** func_inputs,
int64_t* func_params, uint8_t n_funcs,
uint8_t frame_type, uint8_t frame_start, uint8_t frame_end,
int64_t frame_start_n, int64_t frame_end_n);
ray_op_t* ray_select(ray_graph_t* g, ray_op_t* input,
ray_op_t** cols, uint8_t n_cols);
ray_op_t* ray_head(ray_graph_t* g, ray_op_t* input, int64_t n);
ray_op_t* ray_tail(ray_graph_t* g, ray_op_t* input, int64_t n);
ray_op_t* ray_alias(ray_graph_t* g, ray_op_t* input, const char* name);
ray_op_t* ray_materialize(ray_graph_t* g, ray_op_t* input);
uint16_t ray_graph_add_table(ray_graph_t* g, ray_t* table);
ray_op_t* ray_scan_table(ray_graph_t* g, uint16_t table_id, const char* col_name);
ray_op_t* ray_expand(ray_graph_t* g, ray_op_t* src_nodes,
ray_rel_t* rel, uint8_t direction);
ray_op_t* ray_var_expand(ray_graph_t* g, ray_op_t* start_nodes,
ray_rel_t* rel, uint8_t direction,
uint8_t min_depth, uint8_t max_depth,
bool track_path);
ray_op_t* ray_shortest_path(ray_graph_t* g, ray_op_t* src, ray_op_t* dst,
ray_rel_t* rel, uint8_t max_depth);
ray_op_t* ray_wco_join(ray_graph_t* g,
ray_rel_t** rels, uint8_t n_rels,
uint8_t n_vars);
ray_op_t* ray_pagerank(ray_graph_t* g, ray_rel_t* rel,
uint16_t max_iter, double damping);
ray_op_t* ray_connected_comp(ray_graph_t* g, ray_rel_t* rel);
ray_op_t* ray_dijkstra(ray_graph_t* g, ray_op_t* src, ray_op_t* dst,
ray_rel_t* rel, const char* weight_col,
uint8_t max_depth);
ray_op_t* ray_louvain(ray_graph_t* g, ray_rel_t* rel,
uint16_t max_iter);
ray_op_t* ray_degree_cent(ray_graph_t* g, ray_rel_t* rel);
ray_op_t* ray_topsort(ray_graph_t* g, ray_rel_t* rel);
ray_op_t* ray_dfs(ray_graph_t* g, ray_op_t* src, ray_rel_t* rel, uint8_t max_depth);
ray_op_t* ray_astar(ray_graph_t* g, ray_op_t* src, ray_op_t* dst,
ray_rel_t* rel, const char* weight_col,
const char* lat_col, const char* lon_col,
ray_t* node_props, uint8_t max_depth);
ray_op_t* ray_k_shortest(ray_graph_t* g, ray_op_t* src, ray_op_t* dst,
ray_rel_t* rel, const char* weight_col, uint16_t k);
ray_op_t* ray_cluster_coeff(ray_graph_t* g, ray_rel_t* rel);
ray_op_t* ray_random_walk(ray_graph_t* g, ray_op_t* src, ray_rel_t* rel,
uint16_t walk_length);
ray_op_t* ray_betweenness(ray_graph_t* g, ray_rel_t* rel, uint16_t sample_size);
ray_op_t* ray_closeness(ray_graph_t* g, ray_rel_t* rel, uint16_t sample_size);
ray_op_t* ray_mst(ray_graph_t* g, ray_rel_t* rel, const char* weight_col);
ray_op_t* ray_ann_rerank(ray_graph_t* g, ray_op_t* src,
ray_hnsw_t* idx, const float* query_vec,
int32_t dim, int64_t k, int32_t ef_search);
ray_op_t* ray_knn_rerank(ray_graph_t* g, ray_op_t* src,
int64_t col_sym, const float* query_vec,
int32_t dim, int64_t k, ray_hnsw_metric_t metric);
ray_rel_t* ray_rel_build(ray_t* from_table, const char* fk_col,
int64_t n_target_nodes, bool sort_targets);
ray_rel_t* ray_rel_from_edges(ray_t* edge_table,
const char* src_col, const char* dst_col,
int64_t n_src_nodes, int64_t n_dst_nodes,
bool sort_targets);
ray_err_t ray_rel_save(ray_rel_t* rel, const char* dir);
ray_rel_t* ray_rel_load(const char* dir);
ray_rel_t* ray_rel_mmap(const char* dir);
void ray_rel_set_props(ray_rel_t* rel, ray_t* props);
void ray_rel_free(ray_rel_t* rel);
const int64_t* ray_rel_neighbors(ray_rel_t* rel, int64_t node,
uint8_t direction, int64_t* out_count);
int64_t ray_rel_n_nodes(ray_rel_t* rel, uint8_t direction);
ray_op_t* ray_optimize(ray_graph_t* g, ray_op_t* root);
void ray_fuse_pass(ray_graph_t* g, ray_op_t* root);
const char* ray_opcode_name(uint16_t op);
void ray_graph_dump(ray_graph_t* g, ray_op_t* root, void* out);
ray_t* ray_sort_indices(ray_t** cols, uint8_t* descs, uint8_t* nulls_first,
uint8_t n_cols, int64_t nrows);
ray_t* ray_execute(ray_graph_t* g, ray_op_t* root);
#define RAY_LAZY_GRAPH(p) (*(ray_graph_t**)((p)->nullmap))
#define RAY_LAZY_OP(p) (*(ray_op_t**)(((p)->nullmap) + 8))
ray_op_t* ray_graph_input_vec(ray_graph_t* g, ray_t* vec);
ray_t* ray_lazy_wrap(ray_graph_t* g, ray_op_t* op);
ray_t* ray_lazy_append(ray_t* lazy, uint16_t opcode);
#ifdef __cplusplus
}
#endif
#endif