List of all items
Structs
- BufferRange
- CommandEncoder
- DispatchRecord
- EncoderSession
- KernelRegistry
- MTLSize
- MemRanges
- MlxBuffer
- MlxBufferPool
- MlxDevice
- encoder_worker::EncoderWorker
- gguf::GgufFile
- gguf::TensorInfo
- graph::ComputeGraph
- graph::ConflictTracker
- graph::GraphExecutor
- graph::GraphSession
- graph::MlxBuffer
- kernel_profile::DispatchEntry
- kernel_profile::ProfileEntry
- metal_capture::MetalCapture
- ops::chunk_gated_delta_rule::ChunkGatedDeltaRuleParams
- ops::chunk_gated_delta_rule::ChunkInternalArena
- ops::chunk_gated_delta_rule_tri_solve_invert::ChunkTriSolveInvertParams
- ops::copy::StridedCopyParams
- ops::dense_gemm::DenseGemmF16Params
- ops::dense_mm_bf16::DenseMmBf16F32Params
- ops::dense_mm_f16::DenseMmF16F32Params
- ops::dense_mm_f32_f32::DenseMmF32F32Params
- ops::embedding::EmbeddingGatherParams
- ops::flash_attn_prefill::AttnMaskParamsGpu
- ops::flash_attn_prefill::AttnParamsGpu
- ops::flash_attn_prefill::FlashAttnPrefillParams
- ops::flash_attn_prefill::FlashAttnPrefillResumeParams
- ops::flash_attn_prefill_blk::BlkParams
- ops::flash_attn_prefill_mask::SdpaMaskParams
- ops::flash_attn_train::FlashAttnTrainParams
- ops::flash_attn_vec::FlashAttnVecParams
- ops::flash_attn_vec_peer_port_f16::FlashAttnVecPeerPortParams
- ops::flash_attn_vec_peer_port_f16::FlashAttnVecPeerPortReduceParams
- ops::flash_attn_vec_tq::FlashAttnVecTqParams
- ops::flash_attn_vec_tq_hb::FlashAttnVecTqHbParams
- ops::gated_delta_net::GatedDeltaNetParams
- ops::gated_delta_net_chunk::GatedDeltaNetChunkParams
- ops::gated_delta_net_chunk_o::GatedDeltaNetChunkOParams
- ops::gated_delta_net_kkt::GatedDeltaNetKktParams
- ops::gated_delta_net_recompute_wu::GatedDeltaNetRecomputeWuParams
- ops::moe_dispatch::ExpertWeights
- ops::moe_dispatch::MoeDispatchParams
- ops::moe_gate::MoeGateParams
- ops::mul_mv_ext::MulMvExtParams
- ops::qkv_split::QkvSplitParams
- ops::quantized_matmul::QuantizedMatmulParams
- ops::quantized_matmul_ggml::GgmlQuantizedMatmulParams
- ops::quantized_matmul_ggml::GgmlQuantizedMatmulPerm021Params
- ops::quantized_matmul_id::QuantizedMatmulIdParams
- ops::quantized_matmul_id_ggml::GgmlIdMmDispatchParams
- ops::quantized_matmul_id_ggml::GgmlQuantizedMatmulIdParams
- ops::quantized_matmul_id_ggml::IdMmScratch
- ops::repeat_tiled::RepeatTiledParams
- ops::rope_multi::RopeMultiBufferPack
- ops::rope_multi::RopeMultiParams
- ops::rope_train::RopeTrainParams
- ops::scale_mask_softmax::ScaleMaskSoftmaxParams
- ops::sdpa::SdpaParams
- ops::sdpa_sliding::SdpaSlidingParams
- ops::ssm_conv::SsmConvParams
- ops::tri_solve::TriSolveParams
- tq_oracle::TqHbOracleParams
- turboquant::TurboQuantConfig
- weight::QuantizationConfig
- weight::QuantizedWeight
- weight::SafetensorsFile
- weight::TensorQuantConfig
Enums
- CapturedNode
- CapturedOpKind
- DType
- DispatchKind
- KernelArg
- MemRangeRole
- MlxError
- RecordedBinding
- gguf::MetadataValue
- graph::DType
- graph::OpKind
- ops::elementwise::CastDirection
- ops::encode_helpers::KernelArg
- ops::flash_attn_prefill::FlashAttnPrefillLayout
- ops::quantized_matmul_ggml::GgmlType
- ops::rope_multi::RopeMultiMode
- turboquant::BitWidth
Functions
- auto_barrier_concurrent_count
- auto_barrier_count
- barrier_count
- barrier_total_ns
- cmd_buf_count
- dispatch_count
- kernel_profile::convert_gpu_ticks_to_ns
- kernel_profile::dump
- kernel_profile::dump_dispatches
- kernel_profile::is_dispatch_enabled
- kernel_profile::is_enabled
- kernel_profile::record
- kernel_profile::record_clock_pair
- kernel_profile::record_dispatch
- kernel_profile::reset
- ops::adam_update::dispatch_adam_update_f32
- ops::adam_update::register
- ops::add_bias_row_2d::dispatch_add_bias_row_2d_f32
- ops::add_bias_row_2d::register
- ops::argmax::dispatch_argmax_f32
- ops::argmax::register
- ops::argsort::dispatch_argsort_desc_f32
- ops::argsort::register
- ops::bilinear_resize_2d::dispatch_bilinear_resize_2d_f32
- ops::bilinear_resize_2d::register
- ops::block_merge_2x2::dispatch_block_merge_2x2_f32
- ops::block_merge_2x2::register
- ops::chunk_gated_delta_rule::dispatch_chunk_gated_delta_rule_fwd
- ops::chunk_gated_delta_rule::dispatch_chunk_gated_delta_rule_fwd_with_arena
- ops::chunk_gated_delta_rule::register
- ops::chunk_gated_delta_rule_tri_solve_invert::build_chunk_tri_solve_invert_params
- ops::chunk_gated_delta_rule_tri_solve_invert::dispatch_chunk_tri_solve_invert
- ops::chunk_gated_delta_rule_tri_solve_invert::register
- ops::compute_g_beta::compute_g_beta_gpu
- ops::compute_g_beta::dispatch_compute_g_beta
- ops::compute_g_beta::register
- ops::conv1d_depthwise_causal::dispatch_conv1d_depthwise_causal_backward_dw_f32
- ops::conv1d_depthwise_causal::dispatch_conv1d_depthwise_causal_backward_dx_f32
- ops::conv1d_depthwise_causal::dispatch_conv1d_depthwise_causal_forward_f32
- ops::conv1d_depthwise_causal::register
- ops::copy::dispatch_copy_f32
- ops::copy::dispatch_strided_copy_f32
- ops::copy::register
- ops::cumsum::dispatch_cumsum
- ops::cumsum::register
- ops::dense_gemm::dispatch_dense_gemm_f16
- ops::dense_gemm::dispatch_dense_matvec_bf16w_f32io
- ops::dense_gemm::dispatch_dense_matvec_f16w_f32io
- ops::dense_gemm::dispatch_dense_matvec_f32
- ops::dense_gemm::register
- ops::dense_gemv_bf16::dense_gemv_bf16_f32
- ops::dense_gemv_bf16::register
- ops::dense_mm_bf16::dense_matmul_bf16_f32_tensor
- ops::dense_mm_f16::dense_matmul_f16_f32_tensor
- ops::dense_mm_f32_f32::dense_matmul_f32_f32_tensor
- ops::dequant_to_f16::dispatch_dequant_to_f16
- ops::dequant_to_f16::materialize_f16_shadow
- ops::divide_elementwise::dispatch_divide_backward_f32
- ops::divide_elementwise::dispatch_divide_f32
- ops::divide_elementwise::register
- ops::elementwise::cast
- ops::elementwise::dispatch_cast_bf16_to_f32_with_encoder
- ops::elementwise::dispatch_cast_f32_to_bf16_with_encoder
- ops::elementwise::dispatch_scalar_mul_bf16_with_encoder
- ops::elementwise::elementwise_add
- ops::elementwise::elementwise_mul
- ops::elementwise::embedding_gather_scale_batch_f32
- ops::elementwise::embedding_gather_scale_f32
- ops::elementwise::scalar_mul_bf16
- ops::elementwise::scalar_mul_f32
- ops::embedding::embedding_gather
- ops::embedding_autograd::dispatch_embedding_lookup_f32
- ops::embedding_autograd::dispatch_embedding_scatter_add_f32
- ops::embedding_autograd::register
- ops::encode_helpers::as_bytes
- ops::encode_helpers::encode_threadgroups_with_args
- ops::encode_helpers::encode_threadgroups_with_args_and_shared
- ops::encode_helpers::encode_with_args
- ops::exp_elementwise::dispatch_exp_backward_f32
- ops::exp_elementwise::dispatch_exp_f32
- ops::exp_elementwise::register
- ops::feature_concat::dispatch_feature_concat_f32
- ops::feature_concat::register
- ops::flash_attn_prefill::dispatch_flash_attn_prefill_bf16_d256
- ops::flash_attn_prefill::dispatch_flash_attn_prefill_bf16_d256_resume
- ops::flash_attn_prefill::dispatch_flash_attn_prefill_bf16_d256_with_blk
- ops::flash_attn_prefill::dispatch_flash_attn_prefill_bf16_d64
- ops::flash_attn_prefill::dispatch_flash_attn_prefill_f16_d256_resume
- ops::flash_attn_prefill::register
- ops::flash_attn_prefill_blk::alloc_blk_buffer
- ops::flash_attn_prefill_blk::blk_buffer_byte_len
- ops::flash_attn_prefill_blk::dispatch_flash_attn_prefill_blk
- ops::flash_attn_prefill_blk::register
- ops::flash_attn_prefill_d512::dispatch_flash_attn_prefill_bf16_d512
- ops::flash_attn_prefill_d512::dispatch_flash_attn_prefill_bf16_d512_resume
- ops::flash_attn_prefill_d512::dispatch_flash_attn_prefill_bf16_d512_with_blk
- ops::flash_attn_prefill_d512::dispatch_flash_attn_prefill_bf16_d512_with_nsg
- ops::flash_attn_prefill_d512::dispatch_flash_attn_prefill_bf16_d512_with_nsg_and_blk
- ops::flash_attn_prefill_d512::dispatch_flash_attn_prefill_f16_d512_resume
- ops::flash_attn_prefill_d512::register
- ops::flash_attn_prefill_mask::build_sdpa_mask_bf16
- ops::flash_attn_prefill_mask::register
- ops::flash_attn_train::dispatch_flash_attn_train_bwd_bf16_d256
- ops::flash_attn_train::dispatch_flash_attn_train_bwd_bf16_d64
- ops::flash_attn_train::dispatch_flash_attn_train_fwd_bf16_d256
- ops::flash_attn_train::dispatch_flash_attn_train_fwd_bf16_d64
- ops::flash_attn_train::register
- ops::flash_attn_train::register_bwd
- ops::flash_attn_vec::flash_attn_vec
- ops::flash_attn_vec::register
- ops::flash_attn_vec::tmp_buffer_bytes
- ops::flash_attn_vec_hybrid::flash_attn_vec_hybrid
- ops::flash_attn_vec_hybrid::register
- ops::flash_attn_vec_peer_port_f16::flash_attn_vec_peer_port_f16
- ops::flash_attn_vec_peer_port_f16::flash_attn_vec_peer_port_f16_nwg32
- ops::flash_attn_vec_peer_port_f16::flash_attn_vec_peer_port_f16_nwg32_tmp_bytes
- ops::flash_attn_vec_peer_port_f16::register
- ops::flash_attn_vec_reduce_tq_hb_undo::dispatch_flash_attn_vec_reduce_tq_hb_undo
- ops::flash_attn_vec_reduce_tq_hb_undo::register
- ops::flash_attn_vec_tq::flash_attn_vec_tq
- ops::flash_attn_vec_tq::register
- ops::flash_attn_vec_tq::tmp_buffer_bytes
- ops::flash_attn_vec_tq_hb::compute_nsg
- ops::flash_attn_vec_tq_hb::flash_attn_vec_tq_hb
- ops::flash_attn_vec_tq_hb::flash_attn_vec_tq_hb_with_fused_undo
- ops::flash_attn_vec_tq_hb::register
- ops::flash_attn_vec_tq_hb::tmp_buffer_bytes
- ops::fused_head_norm_rope::dispatch_fused_head_norm_rope_batch_bf16
- ops::fused_head_norm_rope::dispatch_fused_head_norm_rope_batch_f32
- ops::fused_head_norm_rope::dispatch_fused_head_norm_rope_batch_f32_with_bf16
- ops::fused_head_norm_rope::dispatch_fused_head_norm_rope_batch_f32_with_bf16_f32_perm
- ops::fused_head_norm_rope::dispatch_fused_head_norm_rope_bf16
- ops::fused_head_norm_rope::dispatch_fused_head_norm_rope_f32
- ops::fused_head_norm_rope::register
- ops::fused_norm_add::dispatch_fused_moe_routing_batch_f32
- ops::fused_norm_add::dispatch_fused_moe_routing_f32
- ops::fused_norm_add::dispatch_fused_moe_wsum_dnorm_add_f32
- ops::fused_norm_add::dispatch_fused_moe_wsum_norm_add_f32
- ops::fused_norm_add::dispatch_fused_norm_add_bf16
- ops::fused_norm_add::dispatch_fused_norm_add_f32
- ops::fused_norm_add::dispatch_fused_norm_add_no_weight_bf16
- ops::fused_norm_add::dispatch_fused_norm_add_scalar_f32
- ops::fused_norm_add::dispatch_fused_residual_norm_f32
- ops::fused_norm_add::dispatch_fused_residual_norm_scalar_f32
- ops::fused_norm_add::register
- ops::fused_residual_norm::dispatch_fused_residual_norm_bf16
- ops::fused_residual_norm::register
- ops::fwht_standalone::dispatch_fwht_f32
- ops::fwht_standalone::dispatch_fwht_sign_premult_f32
- ops::fwht_standalone::dispatch_fwht_sign_undo_f32
- ops::gated_delta_net::build_gated_delta_net_params
- ops::gated_delta_net::cpu_reference_f32
- ops::gated_delta_net::dispatch_gated_delta_net
- ops::gated_delta_net::register
- ops::gated_delta_net_chunk::build_gated_delta_net_chunk_params
- ops::gated_delta_net_chunk::dispatch_gated_delta_net_chunk_inter_state
- ops::gated_delta_net_chunk::register
- ops::gated_delta_net_chunk_o::build_gated_delta_net_chunk_o_params
- ops::gated_delta_net_chunk_o::dispatch_gated_delta_net_chunk_o
- ops::gated_delta_net_chunk_o::register
- ops::gated_delta_net_decode::dispatch_gated_delta_net_decode
- ops::gated_delta_net_decode::register
- ops::gated_delta_net_kkt::build_gated_delta_net_kkt_params
- ops::gated_delta_net_kkt::dispatch_gated_delta_net_kkt
- ops::gated_delta_net_kkt::register
- ops::gated_delta_net_recompute_wu::build_gated_delta_net_recompute_wu_params
- ops::gated_delta_net_recompute_wu::dispatch_gated_delta_net_recompute_wu
- ops::gated_delta_net_recompute_wu::register
- ops::gather::dispatch_gather_f32
- ops::gather::register
- ops::gather_bench::dispatch_gather_f16_seq
- ops::gather_bench::dispatch_gather_nibble
- ops::gather_bench::register
- ops::gelu::dispatch_gelu
- ops::gelu::register
- ops::hadamard::dispatch_hadamard_transform
- ops::hadamard::register
- ops::hadamard_quantize_kv::dispatch_hadamard_quantize_kv
- ops::hadamard_quantize_kv::dispatch_hadamard_quantize_kv_fast_dual
- ops::hadamard_quantize_kv::dispatch_hadamard_quantize_kv_hb
- ops::hadamard_quantize_kv::dispatch_hadamard_quantize_kv_hb_dual
- ops::hadamard_quantize_kv::dispatch_hadamard_quantize_kv_hb_seq
- ops::hadamard_quantize_kv::dispatch_hadamard_quantize_kv_seq
- ops::hadamard_quantize_kv::dispatch_kv_copy_kf16_quantize_v_no_fwht
- ops::hadamard_quantize_kv::dispatch_kv_quantize_v_no_fwht
- ops::hadamard_quantize_kv::dispatch_kv_quantize_v_no_fwht_seq
- ops::hadamard_quantize_kv::register
- ops::im2col_2d_3ch::dispatch_im2col_2d_3ch_f32
- ops::im2col_2d_3ch::register
- ops::kv_cache_copy::dispatch_kv_cache_copy
- ops::kv_cache_copy::dispatch_kv_cache_copy_batch_f32
- ops::kv_cache_copy::dispatch_kv_cache_copy_batch_f32_kv_dual
- ops::kv_cache_copy::dispatch_kv_cache_copy_batch_f32_to_f16
- ops::kv_cache_copy::dispatch_kv_cache_copy_batch_f32_to_f16_kv_dual
- ops::kv_cache_copy::dispatch_kv_cache_copy_f32
- ops::kv_cache_copy::dispatch_kv_cache_copy_seq_bf16
- ops::kv_cache_copy::dispatch_kv_cache_copy_seq_bf16_to_bf16_head_major
- ops::kv_cache_copy::dispatch_kv_cache_copy_seq_f32
- ops::kv_cache_copy::dispatch_kv_cache_copy_seq_f32_dual
- ops::kv_cache_copy::dispatch_kv_cache_copy_seq_f32_to_f16
- ops::kv_cache_copy::dispatch_kv_cache_copy_seq_f32_to_f16_dual
- ops::kv_cache_copy::register
- ops::l2_norm::dispatch_l2_norm
- ops::l2_norm::dispatch_l2_norm_scale_f32
- ops::l2_norm::register
- ops::log_elementwise::dispatch_log_backward_f32
- ops::log_elementwise::dispatch_log_f32
- ops::log_elementwise::register
- ops::moe_dispatch::fused_gelu_mul_bf16_encode
- ops::moe_dispatch::moe_accumulate_encode
- ops::moe_dispatch::moe_accumulate_encode_offset
- ops::moe_dispatch::moe_dispatch
- ops::moe_dispatch::moe_gather_topk_weights_encode
- ops::moe_dispatch::moe_swiglu_batch_encode
- ops::moe_dispatch::moe_swiglu_fused_encode
- ops::moe_dispatch::moe_swiglu_fused_encode_offset
- ops::moe_dispatch::moe_swiglu_seq_backward_encode
- ops::moe_dispatch::moe_swiglu_seq_bf16_encode
- ops::moe_dispatch::moe_swiglu_seq_encode
- ops::moe_dispatch::moe_weighted_sum_encode
- ops::moe_dispatch::moe_weighted_sum_seq_backward_outputs_encode
- ops::moe_dispatch::moe_weighted_sum_seq_backward_weights_encode
- ops::moe_dispatch::moe_weighted_sum_seq_bf16_input_encode
- ops::moe_dispatch::moe_weighted_sum_seq_encode
- ops::moe_dispatch::moe_zero_buffer_encode
- ops::moe_gate::moe_gate
- ops::moe_softmax_topk::dispatch_moe_softmax_topk
- ops::moe_softmax_topk::register
- ops::moe_weighted_reduce::dispatch_moe_weighted_reduce
- ops::moe_weighted_reduce::register
- ops::mul_mv_ext::mul_mv_ext_dispatch
- ops::outer_product::dispatch_outer_product_backward_lhs_f32
- ops::outer_product::dispatch_outer_product_backward_rhs_f32
- ops::outer_product::dispatch_outer_product_f32
- ops::outer_product::register
- ops::qdq_affine::dispatch_qdq_affine_backward_biases_f32
- ops::qdq_affine::dispatch_qdq_affine_backward_scales_f32
- ops::qdq_affine::dispatch_qdq_affine_forward_f32
- ops::qdq_affine::dispatch_qdq_affine_init_f32
- ops::qdq_affine::register
- ops::qdq_legacy::dispatch_qdq_q4_0_f32
- ops::qdq_legacy::dispatch_qdq_q8_0_f32
- ops::qdq_legacy::register
- ops::qkv_split::dispatch_qkv_split_f32
- ops::qkv_split::register
- ops::qmm_affine::dispatch_qmm_affine_t_f32
- ops::qmm_affine::dispatch_qmm_affine_t_f32_simd
- ops::qmm_affine::dispatch_qmm_affine_t_f32_simd4
- ops::qmm_affine::dispatch_qmm_affine_t_f32_simd4_gs64
- ops::qmm_affine::dispatch_qmm_affine_t_f32_tiled
- ops::qmm_affine::dispatch_qmm_affine_t_packed_simd4_b4
- ops::qmm_affine::register
- ops::quantized_matmul::dispatch_quantized_matmul_simd_bf16
- ops::quantized_matmul::dispatch_quantized_matmul_simd_bf16_expert
- ops::quantized_matmul::quantized_matmul
- ops::quantized_matmul::quantized_matmul_simd
- ops::quantized_matmul_ggml::build_q6k_nr2_m1_record
- ops::quantized_matmul_ggml::dispatch_mm_v2_f16
- ops::quantized_matmul_ggml::quantized_matmul_ggml
- ops::quantized_matmul_ggml::quantized_matmul_mm_tensor_perm021
- ops::quantized_matmul_ggml::quantized_matmul_mm_tensor_perm021_f16
- ops::quantized_matmul_id::quantized_matmul_id
- ops::quantized_matmul_id::quantized_matmul_id_into
- ops::quantized_matmul_id_ggml::build_q6k_id_nr2_m1_record
- ops::quantized_matmul_id_ggml::build_q8_0_id_decode_record
- ops::quantized_matmul_id_ggml::quantized_matmul_id_ggml
- ops::quantized_matmul_id_ggml::quantized_matmul_id_ggml_pooled
- ops::quantized_matmul_id_ggml::quantized_matmul_id_swiglu_q4_0
- ops::repeat_tiled::dispatch_repeat_tiled_f32
- ops::repeat_tiled::register
- ops::rms_norm::build_rms_norm_decode_record
- ops::rms_norm::dispatch_fused_moe_wsum_post_ff_norm2_endlayer_f32_v2
- ops::rms_norm::dispatch_fused_post_attn_triple_norm_f32
- ops::rms_norm::dispatch_fused_post_ff_norm2_endlayer_f32
- ops::rms_norm::dispatch_rms_norm
- ops::rms_norm::dispatch_rms_norm_f32_triple
- ops::rms_norm::dispatch_rms_norm_mul
- ops::rms_norm::dispatch_rms_norm_no_scale_bf16
- ops::rms_norm::dispatch_rms_norm_no_scale_f32
- ops::rms_norm::register
- ops::rms_norm_backward::dispatch_rms_norm_backward_dw
- ops::rms_norm_backward::dispatch_rms_norm_backward_dx
- ops::rms_norm_backward::dispatch_rms_norm_compute_rms_inv
- ops::rms_norm_backward::register
- ops::rope::dispatch_rope
- ops::rope::dispatch_rope_neox_bf16
- ops::rope::dispatch_rope_neox_f32
- ops::rope::register
- ops::rope_multi::build_rope_multi_buffers
- ops::rope_multi::clear_rope_pack_cache
- ops::rope_multi::dispatch_rope_multi
- ops::rope_multi::dispatch_rope_multi_cached
- ops::rope_multi::register
- ops::rope_multi::rope_pack_cache_len
- ops::rope_train::dispatch_rope_backward_bf16
- ops::rope_train::dispatch_rope_backward_f32
- ops::rope_train::dispatch_rope_forward_bf16
- ops::rope_train::dispatch_rope_forward_f32
- ops::row_sum::dispatch_row_sum_backward_f32
- ops::row_sum::dispatch_row_sum_f32
- ops::row_sum::register
- ops::scale_mask_softmax::dispatch_scale_mask_softmax_f32
- ops::sdpa::register
- ops::sdpa::sdpa
- ops::sdpa_decode::dispatch_sdpa_decode
- ops::sdpa_decode::register
- ops::sdpa_sliding::register
- ops::sdpa_sliding::sdpa_sliding
- ops::sigmoid_mul::dispatch_sigmoid_mul
- ops::sigmoid_mul::register
- ops::silu_backward::dispatch_silu_backward_f32
- ops::silu_backward::dispatch_silu_f32
- ops::silu_backward::register
- ops::silu_mul::dispatch_silu_mul
- ops::silu_mul::register
- ops::silu_mul::silu_mul_gpu
- ops::slice_concat_2d::dispatch_copy_2d_cols_into_f32
- ops::slice_concat_2d::dispatch_slice_2d_cols_f32
- ops::slice_concat_2d::register
- ops::softcap::dispatch_softcap
- ops::softcap::register
- ops::softmax::dispatch_softmax
- ops::softmax::register
- ops::softmax_backward::dispatch_softmax_backward
- ops::softmax_backward::register
- ops::softmax_sample::dispatch_softmax_sample_f32
- ops::softmax_sample::register
- ops::sqrt_elementwise::dispatch_sqrt_backward_f32
- ops::sqrt_elementwise::dispatch_sqrt_f32
- ops::sqrt_elementwise::register
- ops::ssm_conv::dispatch_ssm_conv
- ops::ssm_conv::register
- ops::ssm_norm_gate::build_ssm_norm_gate_params
- ops::ssm_norm_gate::dispatch_ssm_norm_gate
- ops::ssm_norm_gate::register
- ops::take_along_axis::dispatch_take_along_axis_backward_f32
- ops::take_along_axis::dispatch_take_along_axis_f32
- ops::take_along_axis::register
- ops::top_k::dispatch_top_k_f32
- ops::top_k::register
- ops::tq_dequantize_kv::dispatch_tq_dequantize_hb_kv
- ops::tq_dequantize_kv::dispatch_tq_dequantize_hb_kv_seq
- ops::tq_dequantize_kv::dispatch_tq_dequantize_kv
- ops::tq_dequantize_kv::register
- ops::transpose::permute_021_bf16
- ops::transpose::permute_021_bf16_to_f32
- ops::transpose::permute_021_f32
- ops::transpose::transpose_2d
- ops::transpose::transpose_last2_bf16
- ops::transpose::transpose_last2_f16
- ops::tri_solve::dispatch_tri_solve
- ops::tri_solve::register
- ops::vision_2d_rope::build_vision_2d_rope_params
- ops::vision_2d_rope::dispatch_vision_2d_rope
- ops::vision_2d_rope::register
- pipeline_dispatch_buckets
- reset_counters
- reset_pipeline_dispatch_buckets
- sync_count
- tq_oracle::flash_attn_vec_tq_hb_oracle
- turboquant::apply_d1_sign_mask_inplace
- turboquant::compute_lloyd_max_beta_codebook
- turboquant::compute_lloyd_max_codebook
- turboquant::fwht_inplace
- turboquant::hb_centroid
- turboquant::hb_nearest_centroid
- turboquant::turboquant_dequantize
- turboquant::turboquant_hb_encode_d256
- turboquant::turboquant_quantize
- weight::load_quantized_weights
- weight::safetensors_to_metal_buffer
Type Aliases
Statics
- ops::adam_update::ADAM_UPDATE_SHADER_SOURCE
- ops::add_bias_row_2d::ADD_BIAS_ROW_2D_SHADER_SOURCE
- ops::argmax::ARGMAX_SHADER_SOURCE
- ops::argsort::ARGSORT_SHADER_SOURCE
- ops::bilinear_resize_2d::BILINEAR_RESIZE_2D_SHADER_SOURCE
- ops::block_merge_2x2::BLOCK_MERGE_2X2_SHADER_SOURCE
- ops::chunk_gated_delta_rule::CHUNK_LOCAL_CUMSUM_G_SHADER_SOURCE
- ops::chunk_gated_delta_rule_tri_solve_invert::CHUNK_TRI_SOLVE_INVERT_SHADER_SOURCE
- ops::compute_g_beta::COMPUTE_G_BETA_SHADER_SOURCE
- ops::conv1d_depthwise_causal::CONV1D_DEPTHWISE_CAUSAL_SHADER_SOURCE
- ops::copy::COPY_SHADER_SOURCE
- ops::cumsum::CUMSUM_SHADER_SOURCE
- ops::dense_gemm::DENSE_GEMM_SHADER_SOURCE
- ops::dense_gemv_bf16::DENSE_GEMV_BF16_SHADER_SOURCE
- ops::divide_elementwise::DIVIDE_ELEMENTWISE_SHADER_SOURCE
- ops::embedding_autograd::EMBEDDING_AUTOGRAD_SHADER_SOURCE
- ops::exp_elementwise::EXP_ELEMENTWISE_SHADER_SOURCE
- ops::feature_concat::FEATURE_CONCAT_SHADER_SOURCE
- ops::flash_attn_prefill::FLASH_ATTN_PREFILL_SHADER_SOURCE
- ops::flash_attn_prefill_blk::FLASH_ATTN_PREFILL_BLK_SHADER_SOURCE
- ops::flash_attn_prefill_d512::FLASH_ATTN_PREFILL_D512_SHADER_SOURCE
- ops::flash_attn_prefill_mask::FLASH_ATTN_PREFILL_MASK_SHADER_SOURCE
- ops::flash_attn_train::FLASH_ATTN_TRAIN_BWD_COMPUTE_D_SHADER_SOURCE
- ops::flash_attn_train::FLASH_ATTN_TRAIN_BWD_SHADER_SOURCE
- ops::flash_attn_train::FLASH_ATTN_TRAIN_FWD_SHADER_SOURCE
- ops::flash_attn_vec::FLASH_ATTN_VEC_SHADER_SOURCE
- ops::flash_attn_vec_hybrid::FLASH_ATTN_VEC_HYBRID_SHADER_SOURCE
- ops::flash_attn_vec_peer_port_f16::FLASH_ATTN_VEC_PEER_PORT_SHADER_SOURCE
- ops::flash_attn_vec_reduce_tq_hb_undo::FLASH_ATTN_VEC_REDUCE_TQ_HB_UNDO_SHADER_SOURCE
- ops::flash_attn_vec_tq::FLASH_ATTN_VEC_TQ_SHADER_SOURCE
- ops::flash_attn_vec_tq_hb::FLASH_ATTN_VEC_TQ_HB_SHADER_SOURCE
- ops::fused_head_norm_rope::FUSED_HEAD_NORM_ROPE_F32_SHADER_SOURCE
- ops::fused_head_norm_rope::FUSED_HEAD_NORM_ROPE_SHADER_SOURCE
- ops::fused_norm_add::FUSED_NORM_ADD_SHADER_SOURCE
- ops::fused_residual_norm::FUSED_RESIDUAL_NORM_SHADER_SOURCE
- ops::fwht_standalone::FWHT_STANDALONE_SHADER_SOURCE
- ops::gated_delta_net::GATED_DELTA_NET_SHADER_SOURCE
- ops::gated_delta_net_chunk::GATED_DELTA_NET_CHUNK_SHADER_SOURCE
- ops::gated_delta_net_chunk_o::GATED_DELTA_NET_CHUNK_O_SHADER_SOURCE
- ops::gated_delta_net_decode::GATED_DELTA_NET_DECODE_SHADER_SOURCE
- ops::gated_delta_net_kkt::GATED_DELTA_NET_KKT_SHADER_SOURCE
- ops::gated_delta_net_recompute_wu::GATED_DELTA_NET_RECOMPUTE_WU_SHADER_SOURCE
- ops::gather::GATHER_SHADER_SOURCE
- ops::gather_bench::GATHER_BENCH_SHADER_SOURCE
- ops::gelu::GELU_SHADER_SOURCE
- ops::hadamard::HADAMARD_SHADER_SOURCE
- ops::hadamard_quantize_kv::HADAMARD_QUANTIZE_KV_SHADER_SOURCE
- ops::im2col_2d_3ch::IM2COL_2D_3CH_SHADER_SOURCE
- ops::kv_cache_copy::KV_CACHE_COPY_SHADER_SOURCE
- ops::l2_norm::L2_NORM_SHADER_SOURCE
- ops::log_elementwise::LOG_SHADER_SOURCE
- ops::moe_softmax_topk::MOE_SOFTMAX_TOPK_SHADER_SOURCE
- ops::moe_weighted_reduce::MOE_WEIGHTED_REDUCE_SHADER_SOURCE
- ops::outer_product::OUTER_PRODUCT_SHADER_SOURCE
- ops::qdq_affine::QDQ_AFFINE_SHADER_SOURCE
- ops::qdq_legacy::QDQ_LEGACY_SHADER_SOURCE
- ops::qkv_split::QKV_SPLIT_SHADER_SOURCE
- ops::qmm_affine::QMM_AFFINE_SHADER_SOURCE
- ops::qmm_affine::QMM_AFFINE_SIMD4_GS64_SHADER_SOURCE
- ops::qmm_affine::QMM_AFFINE_SIMD4_SHADER_SOURCE
- ops::qmm_affine::QMM_AFFINE_SIMD_SHADER_SOURCE
- ops::qmm_affine::QMM_AFFINE_TILED_SHADER_SOURCE
- ops::qmm_affine::QMM_AFFINE_T_PACKED_SIMD4_B4_SHADER_SOURCE
- ops::repeat_tiled::REPEAT_TILED_SHADER_SOURCE
- ops::rms_norm::RMS_NORM_SHADER_SOURCE
- ops::rms_norm_backward::RMS_NORM_BACKWARD_SHADER_SOURCE
- ops::rope::ROPE_SHADER_SOURCE
- ops::rope_multi::ROPE_MULTI_SHADER_SOURCE
- ops::row_sum::ROW_SUM_SHADER_SOURCE
- ops::sdpa::SDPA_SHADER_SOURCE
- ops::sdpa_decode::SDPA_DECODE_SHADER_SOURCE
- ops::sdpa_sliding::SDPA_SLIDING_SHADER_SOURCE
- ops::sigmoid_mul::SIGMOID_MUL_SHADER_SOURCE
- ops::silu_backward::SILU_BACKWARD_SHADER_SOURCE
- ops::silu_mul::SILU_MUL_SHADER_SOURCE
- ops::slice_concat_2d::SLICE_CONCAT_2D_SHADER_SOURCE
- ops::softcap::SOFTCAP_SHADER_SOURCE
- ops::softmax::SOFTMAX_SHADER_SOURCE
- ops::softmax_sample::SOFTMAX_SAMPLE_SHADER_SOURCE
- ops::sqrt_elementwise::SQRT_ELEMENTWISE_SHADER_SOURCE
- ops::ssm_conv::SSM_CONV_SHADER_SOURCE
- ops::ssm_norm_gate::SSM_NORM_GATE_SHADER_SOURCE
- ops::take_along_axis::TAKE_ALONG_AXIS_SHADER_SOURCE
- ops::top_k::TOP_K_SHADER_SOURCE
- ops::tq_dequantize_kv::TQ_DEQUANTIZE_KV_SHADER_SOURCE
- ops::tri_solve::TRI_SOLVE_SHADER_SOURCE
- ops::vision_2d_rope::VISION_2D_ROPE_SHADER_SOURCE
Constants
- ops::chunk_gated_delta_rule::FIXED_BT
- ops::chunk_gated_delta_rule::L2_NORM_EPS
- ops::chunk_gated_delta_rule::MAX_K
- ops::chunk_gated_delta_rule::MAX_V
- ops::chunk_gated_delta_rule_tri_solve_invert::FIXED_BT
- ops::flash_attn_prefill_blk::FC_IDX_BK
- ops::flash_attn_prefill_blk::FC_IDX_BQ
- ops::flash_attn_prefill_blk::FC_IDX_HAS_BLK
- ops::flash_attn_prefill_blk::K_BLK_BF16
- ops::flash_attn_prefill_d512::ALL_KERNEL_NAMES
- ops::flash_attn_prefill_d512::FC_IDX_NSG
- ops::flash_attn_prefill_d512::K_LLAMACPP_BF16_D512
- ops::flash_attn_prefill_d512::K_LLAMACPP_BF16_D512_BOOLMASK
- ops::flash_attn_prefill_d512::K_LLAMACPP_F16_D512
- ops::flash_attn_prefill_d512::K_LLAMACPP_F16_D512_BOOLMASK
- ops::flash_attn_prefill_d512::NCPSG_D512
- ops::flash_attn_prefill_d512::NQPSG_D512
- ops::flash_attn_prefill_d512::NSG_D512
- ops::flash_attn_prefill_d512::TGMEM_BYTES_D512
- ops::flash_attn_prefill_mask::K_FILL_BF16
- ops::gated_delta_net::MAX_STATE_D
- ops::gated_delta_net_chunk::DEFAULT_BV
- ops::gated_delta_net_chunk::MAX_K
- ops::gated_delta_net_chunk::MAX_V
- ops::gated_delta_net_chunk_o::DEFAULT_BK
- ops::gated_delta_net_chunk_o::DEFAULT_BV
- ops::gated_delta_net_chunk_o::MAX_K
- ops::gated_delta_net_chunk_o::MAX_V
- ops::gated_delta_net_decode::MAX_NSG
- ops::gated_delta_net_kkt::DEFAULT_BK
- ops::gated_delta_net_kkt::MAX_K
- ops::gated_delta_net_recompute_wu::DEFAULT_BK
- ops::gated_delta_net_recompute_wu::DEFAULT_BV
- ops::gated_delta_net_recompute_wu::MAX_K
- ops::gated_delta_net_recompute_wu::MAX_V
- ops::qdq_legacy::QDQ_BLOCK_SIZE
- ops::quantized_matmul_ggml::MM_ROUTING_THRESHOLD
- ops::quantized_matmul_id_ggml::MM_ID_ROUTING_THRESHOLD
- turboquant::CODEBOOK_2BIT
- turboquant::CODEBOOK_3BIT
- turboquant::CODEBOOK_4BIT
- turboquant::CODEBOOK_HB_5BIT
- turboquant::CODEBOOK_HB_6BIT
- turboquant::CODEBOOK_HB_8BIT
- turboquant::TBQ_SIGNS_256
- turboquant::TBQ_SIGNS_512