1pub mod argmax;
6pub mod argsort;
7pub mod top_k;
8pub mod copy;
9pub mod cumsum;
10pub mod dense_gemm;
11pub mod dense_gemv_bf16;
12pub mod dense_mm_bf16;
13pub mod dense_mm_f16;
14pub mod dense_mm_f32_f32;
15pub mod elementwise;
16pub mod embedding;
17pub mod gather;
18pub mod gather_bench;
19pub mod hadamard;
20pub mod hadamard_quantize_kv;
21pub mod encode_helpers;
22pub mod fused_head_norm_rope;
23pub mod fused_norm_add;
24pub mod fused_residual_norm;
25pub mod gelu;
26pub mod kv_cache_copy;
27pub mod l2_norm;
28pub mod log_elementwise;
29pub mod row_sum;
30pub mod moe_dispatch;
31pub mod moe_gate;
32pub mod moe_softmax_topk;
33pub mod mul_mv_ext;
34pub mod moe_weighted_reduce;
35pub mod qkv_split;
36pub mod repeat_tiled;
37pub mod quantized_matmul;
38pub mod quantized_matmul_ggml;
39pub mod quantized_matmul_id;
40pub mod quantized_matmul_id_ggml;
41pub mod rms_norm;
42pub mod rope;
43pub mod rope_multi;
44pub mod rope_train;
45pub mod vision_2d_rope;
46pub mod scale_mask_softmax;
47pub mod sigmoid_mul;
48pub mod silu_mul;
49pub mod compute_g_beta;
50pub mod ssm_norm_gate;
51pub mod flash_attn_prefill;
52pub mod flash_attn_prefill_blk;
53pub mod flash_attn_train;
54pub mod flash_attn_prefill_d512;
55pub mod flash_attn_prefill_mask;
56pub mod flash_attn_vec;
57pub mod flash_attn_vec_tq;
58pub mod flash_attn_vec_tq_hb;
59pub mod fwht_standalone;
60pub mod chunk_gated_delta_rule;
61pub mod chunk_gated_delta_rule_tri_solve_invert;
62pub mod gated_delta_net;
63pub mod gated_delta_net_decode;
64pub mod gated_delta_net_chunk;
65pub mod gated_delta_net_chunk_o;
66pub mod gated_delta_net_kkt;
67pub mod gated_delta_net_recompute_wu;
68pub mod tq_dequantize_kv;
69pub mod sdpa;
70pub mod sdpa_decode;
71pub mod sdpa_sliding;
72pub mod softcap;
73pub mod softmax;
74pub mod softmax_backward;
75pub mod softmax_sample;
76pub mod ssm_conv;
77pub mod conv1d_depthwise_causal;
78pub mod exp_elementwise;
79pub mod outer_product;
80pub mod take_along_axis;
81pub mod divide_elementwise;
82pub mod sqrt_elementwise;
83pub mod transpose;
84pub mod tri_solve;
85pub mod qdq_legacy;
86pub mod rms_norm_backward;
87pub mod slice_concat_2d;
88pub mod silu_backward;
89pub mod embedding_autograd;
90pub mod adam_update;
91pub mod qdq_affine;
92pub mod qmm_affine;
93pub mod im2col_2d_3ch;
95pub mod add_bias_row_2d;
96pub mod bilinear_resize_2d;
97pub mod block_merge_2x2;
98pub mod feature_concat;