1pub mod argmax;
6pub mod argsort;
7pub mod top_k;
8pub mod copy;
9pub mod cumsum;
10pub mod dense_gemm;
11pub mod dense_gemv_bf16;
12pub mod dense_mm_bf16;
13pub mod dense_mm_f16;
14pub mod dense_mm_f32_f32;
15pub mod dequant_to_f16;
16pub mod elementwise;
17pub mod embedding;
18pub mod gather;
19pub mod gather_bench;
20pub mod hadamard;
21pub mod hadamard_quantize_kv;
22pub mod encode_helpers;
23pub mod fused_head_norm_rope;
24pub mod fused_norm_add;
25pub mod fused_residual_norm;
26pub mod gelu;
27pub mod kv_cache_copy;
28pub mod l2_norm;
29pub mod log_elementwise;
30pub mod row_sum;
31pub mod moe_dispatch;
32pub mod moe_gate;
33pub mod moe_softmax_topk;
34pub mod mul_mv_ext;
35pub mod moe_weighted_reduce;
36pub mod qkv_split;
37pub mod repeat_tiled;
38pub mod quantized_matmul;
39pub mod quantized_matmul_ggml;
40pub mod quantized_matmul_id;
41pub mod quantized_matmul_id_ggml;
42pub mod rms_norm;
43pub mod rope;
44pub mod rope_multi;
45pub mod rope_train;
46pub mod vision_2d_rope;
47pub mod scale_mask_softmax;
48pub mod sigmoid_mul;
49pub mod silu_mul;
50pub mod compute_g_beta;
51pub mod ssm_norm_gate;
52pub mod flash_attn_prefill;
53pub mod flash_attn_prefill_blk;
54pub mod flash_attn_train;
55pub mod flash_attn_prefill_d512;
56pub mod flash_attn_prefill_mask;
57pub mod flash_attn_vec;
58pub mod flash_attn_vec_tq;
59pub mod flash_attn_vec_tq_hb;
60pub mod flash_attn_vec_hybrid;
61pub mod flash_attn_vec_peer_port_f16;
62pub mod flash_attn_vec_reduce_tq_hb_undo;
63pub mod fwht_standalone;
64pub mod chunk_gated_delta_rule;
65pub mod chunk_gated_delta_rule_tri_solve_invert;
66pub mod gated_delta_net;
67pub mod gated_delta_net_decode;
68pub mod gated_delta_net_chunk;
69pub mod gated_delta_net_chunk_o;
70pub mod gated_delta_net_kkt;
71pub mod gated_delta_net_recompute_wu;
72pub mod tq_dequantize_kv;
73pub mod sdpa;
74pub mod sdpa_decode;
75pub mod sdpa_sliding;
76pub mod softcap;
77pub mod softmax;
78pub mod softmax_backward;
79pub mod softmax_sample;
80pub mod ssm_conv;
81pub mod conv1d_depthwise_causal;
82pub mod exp_elementwise;
83pub mod outer_product;
84pub mod take_along_axis;
85pub mod divide_elementwise;
86pub mod sqrt_elementwise;
87pub mod transpose;
88pub mod tri_solve;
89pub mod qdq_legacy;
90pub mod rms_norm_backward;
91pub mod slice_concat_2d;
92pub mod silu_backward;
93pub mod embedding_autograd;
94pub mod adam_update;
95pub mod qdq_affine;
96pub mod qmm_affine;
97pub mod im2col_2d_3ch;
99pub mod add_bias_row_2d;
100pub mod bilinear_resize_2d;
101pub mod block_merge_2x2;
102pub mod feature_concat;