Skip to main content

mlx_native/ops/
mod.rs

1//! GPU kernel host-side dispatch functions.
2//!
3//! Each submodule implements dispatch for a specific kernel family.
4
5pub mod argmax;
6pub mod argsort;
7pub mod top_k;
8pub mod copy;
9pub mod cumsum;
10pub mod dense_gemm;
11pub mod dense_gemv_bf16;
12pub mod dense_mm_bf16;
13pub mod dense_mm_f16;
14pub mod dense_mm_f32_f32;
15pub mod elementwise;
16pub mod embedding;
17pub mod gather;
18pub mod gather_bench;
19pub mod hadamard;
20pub mod hadamard_quantize_kv;
21pub mod encode_helpers;
22pub mod fused_head_norm_rope;
23pub mod fused_norm_add;
24pub mod fused_residual_norm;
25pub mod gelu;
26pub mod kv_cache_copy;
27pub mod l2_norm;
28pub mod moe_dispatch;
29pub mod moe_gate;
30pub mod moe_softmax_topk;
31pub mod moe_weighted_reduce;
32pub mod qkv_split;
33pub mod repeat_tiled;
34pub mod quantized_matmul;
35pub mod quantized_matmul_ggml;
36pub mod quantized_matmul_id;
37pub mod quantized_matmul_id_ggml;
38pub mod rms_norm;
39pub mod rope;
40pub mod rope_multi;
41pub mod vision_2d_rope;
42pub mod scale_mask_softmax;
43pub mod sigmoid_mul;
44pub mod silu_mul;
45pub mod compute_g_beta;
46pub mod ssm_norm_gate;
47pub mod flash_attn_prefill;
48pub mod flash_attn_prefill_blk;
49pub mod flash_attn_prefill_d512;
50pub mod flash_attn_prefill_mask;
51pub mod flash_attn_vec;
52pub mod flash_attn_vec_tq;
53pub mod flash_attn_vec_tq_hb;
54pub mod fwht_standalone;
55pub mod chunk_gated_delta_rule;
56pub mod chunk_gated_delta_rule_tri_solve_invert;
57pub mod gated_delta_net;
58pub mod gated_delta_net_decode;
59pub mod gated_delta_net_chunk;
60pub mod gated_delta_net_chunk_o;
61pub mod gated_delta_net_kkt;
62pub mod gated_delta_net_recompute_wu;
63pub mod tq_dequantize_kv;
64pub mod sdpa;
65pub mod sdpa_decode;
66pub mod sdpa_sliding;
67pub mod softcap;
68pub mod softmax;
69pub mod softmax_sample;
70pub mod ssm_conv;
71pub mod transpose;
72pub mod tri_solve;