Skip to main content

mlx_native/ops/
mod.rs

1//! GPU kernel host-side dispatch functions.
2//!
3//! Each submodule implements dispatch for a specific kernel family.
4
5pub mod argmax;
6pub mod argsort;
7pub mod top_k;
8pub mod copy;
9pub mod cumsum;
10pub mod dense_gemm;
11pub mod dense_gemv_bf16;
12pub mod dense_mm_bf16;
13pub mod dense_mm_f32_f32;
14pub mod elementwise;
15pub mod embedding;
16pub mod gather;
17pub mod gather_bench;
18pub mod hadamard;
19pub mod hadamard_quantize_kv;
20pub mod encode_helpers;
21pub mod fused_head_norm_rope;
22pub mod fused_norm_add;
23pub mod fused_residual_norm;
24pub mod gelu;
25pub mod kv_cache_copy;
26pub mod l2_norm;
27pub mod moe_dispatch;
28pub mod moe_gate;
29pub mod moe_softmax_topk;
30pub mod moe_weighted_reduce;
31pub mod quantized_matmul;
32pub mod quantized_matmul_ggml;
33pub mod quantized_matmul_id;
34pub mod quantized_matmul_id_ggml;
35pub mod rms_norm;
36pub mod rope;
37pub mod rope_multi;
38pub mod vision_2d_rope;
39pub mod scale_mask_softmax;
40pub mod sigmoid_mul;
41pub mod silu_mul;
42pub mod compute_g_beta;
43pub mod ssm_norm_gate;
44pub mod flash_attn_prefill;
45pub mod flash_attn_prefill_blk;
46pub mod flash_attn_prefill_d512;
47pub mod flash_attn_prefill_mask;
48pub mod flash_attn_vec;
49pub mod flash_attn_vec_tq;
50pub mod flash_attn_vec_tq_hb;
51pub mod fwht_standalone;
52pub mod gated_delta_net;
53pub mod tq_dequantize_kv;
54pub mod sdpa;
55pub mod sdpa_decode;
56pub mod sdpa_sliding;
57pub mod softcap;
58pub mod softmax;
59pub mod softmax_sample;
60pub mod ssm_conv;
61pub mod transpose;
62pub mod tri_solve;