List of all items
Structs
- CommandEncoder
- KernelRegistry
- MTLSize
- MlxBuffer
- MlxBufferPool
- MlxDevice
- gguf::GgufFile
- gguf::TensorInfo
- graph::ComputeGraph
- graph::ConflictTracker
- graph::GraphExecutor
- graph::GraphSession
- graph::MlxBuffer
- ops::copy::StridedCopyParams
- ops::dense_gemm::DenseGemmF16Params
- ops::embedding::EmbeddingGatherParams
- ops::flash_attn_vec::FlashAttnVecParams
- ops::flash_attn_vec_tq::FlashAttnVecTqParams
- ops::moe_dispatch::ExpertWeights
- ops::moe_dispatch::MoeDispatchParams
- ops::moe_gate::MoeGateParams
- ops::quantized_matmul::QuantizedMatmulParams
- ops::quantized_matmul_ggml::GgmlQuantizedMatmulParams
- ops::quantized_matmul_id::QuantizedMatmulIdParams
- ops::quantized_matmul_id_ggml::GgmlQuantizedMatmulIdParams
- ops::sdpa::SdpaParams
- ops::sdpa_sliding::SdpaSlidingParams
- turboquant::TurboQuantConfig
- weight::QuantizationConfig
- weight::QuantizedWeight
- weight::SafetensorsFile
- weight::TensorQuantConfig
Enums
- CapturedNode
- DType
- DispatchKind
- MlxError
- RecordedBinding
- gguf::MetadataValue
- graph::DType
- graph::OpKind
- ops::elementwise::CastDirection
- ops::encode_helpers::KernelArg
- ops::quantized_matmul_ggml::GgmlType
- turboquant::BitWidth
Functions
- dispatch_count
- ops::argmax::dispatch_argmax_f32
- ops::argmax::register
- ops::argsort::dispatch_argsort_desc_f32
- ops::argsort::register
- ops::copy::dispatch_copy_f32
- ops::copy::dispatch_strided_copy_f32
- ops::copy::register
- ops::dense_gemm::dispatch_dense_gemm_f16
- ops::dense_gemm::dispatch_dense_matvec_f16w_f32io
- ops::dense_gemm::register
- ops::elementwise::cast
- ops::elementwise::dispatch_cast_bf16_to_f32_with_encoder
- ops::elementwise::dispatch_cast_f32_to_bf16_with_encoder
- ops::elementwise::dispatch_scalar_mul_bf16_with_encoder
- ops::elementwise::elementwise_add
- ops::elementwise::elementwise_mul
- ops::elementwise::embedding_gather_scale_batch_f32
- ops::elementwise::embedding_gather_scale_f32
- ops::elementwise::scalar_mul_bf16
- ops::elementwise::scalar_mul_f32
- ops::embedding::embedding_gather
- ops::encode_helpers::as_bytes
- ops::encode_helpers::encode_threadgroups_with_args
- ops::encode_helpers::encode_threadgroups_with_args_and_shared
- ops::encode_helpers::encode_with_args
- ops::flash_attn_vec::flash_attn_vec
- ops::flash_attn_vec::register
- ops::flash_attn_vec::tmp_buffer_bytes
- ops::flash_attn_vec_tq::flash_attn_vec_tq
- ops::flash_attn_vec_tq::register
- ops::flash_attn_vec_tq::tmp_buffer_bytes
- ops::fused_head_norm_rope::dispatch_fused_head_norm_rope_batch_f32
- ops::fused_head_norm_rope::dispatch_fused_head_norm_rope_bf16
- ops::fused_head_norm_rope::dispatch_fused_head_norm_rope_f32
- ops::fused_head_norm_rope::register
- ops::fused_norm_add::dispatch_fused_moe_routing_batch_f32
- ops::fused_norm_add::dispatch_fused_moe_routing_f32
- ops::fused_norm_add::dispatch_fused_norm_add_bf16
- ops::fused_norm_add::dispatch_fused_norm_add_f32
- ops::fused_norm_add::dispatch_fused_norm_add_no_weight_bf16
- ops::fused_norm_add::dispatch_fused_norm_add_scalar_f32
- ops::fused_norm_add::dispatch_fused_residual_norm_f32
- ops::fused_norm_add::dispatch_fused_residual_norm_scalar_f32
- ops::fused_norm_add::register
- ops::fused_residual_norm::dispatch_fused_residual_norm_bf16
- ops::fused_residual_norm::register
- ops::fwht_standalone::dispatch_fwht_f32
- ops::gather::dispatch_gather_f32
- ops::gather::register
- ops::gather_bench::dispatch_gather_f16_seq
- ops::gather_bench::dispatch_gather_nibble
- ops::gather_bench::register
- ops::gelu::dispatch_gelu
- ops::gelu::register
- ops::hadamard::dispatch_hadamard_transform
- ops::hadamard::register
- ops::hadamard_quantize_kv::dispatch_hadamard_quantize_kv
- ops::hadamard_quantize_kv::register
- ops::kv_cache_copy::dispatch_kv_cache_copy
- ops::kv_cache_copy::dispatch_kv_cache_copy_batch_f32
- ops::kv_cache_copy::dispatch_kv_cache_copy_batch_f32_to_f16
- ops::kv_cache_copy::dispatch_kv_cache_copy_f32
- ops::kv_cache_copy::dispatch_kv_cache_copy_seq_f32
- ops::kv_cache_copy::dispatch_kv_cache_copy_seq_f32_to_f16
- ops::kv_cache_copy::register
- ops::moe_dispatch::moe_accumulate_encode
- ops::moe_dispatch::moe_accumulate_encode_offset
- ops::moe_dispatch::moe_dispatch
- ops::moe_dispatch::moe_gather_topk_weights_encode
- ops::moe_dispatch::moe_swiglu_batch_encode
- ops::moe_dispatch::moe_swiglu_fused_encode
- ops::moe_dispatch::moe_swiglu_fused_encode_offset
- ops::moe_dispatch::moe_swiglu_seq_encode
- ops::moe_dispatch::moe_weighted_sum_encode
- ops::moe_dispatch::moe_weighted_sum_seq_encode
- ops::moe_dispatch::moe_zero_buffer_encode
- ops::moe_gate::moe_gate
- ops::quantized_matmul::dispatch_quantized_matmul_simd_bf16
- ops::quantized_matmul::dispatch_quantized_matmul_simd_bf16_expert
- ops::quantized_matmul::quantized_matmul
- ops::quantized_matmul::quantized_matmul_simd
- ops::quantized_matmul_ggml::quantized_matmul_ggml
- ops::quantized_matmul_id::quantized_matmul_id
- ops::quantized_matmul_id_ggml::quantized_matmul_id_ggml
- ops::rms_norm::dispatch_rms_norm
- ops::rms_norm::dispatch_rms_norm_mul
- ops::rms_norm::dispatch_rms_norm_no_scale_bf16
- ops::rms_norm::dispatch_rms_norm_no_scale_f32
- ops::rms_norm::register
- ops::rope::dispatch_rope
- ops::rope::dispatch_rope_neox_bf16
- ops::rope::dispatch_rope_neox_f32
- ops::rope::register
- ops::sdpa::register
- ops::sdpa::sdpa
- ops::sdpa_sliding::register
- ops::sdpa_sliding::sdpa_sliding
- ops::softcap::dispatch_softcap
- ops::softcap::register
- ops::softmax::dispatch_softmax
- ops::softmax::register
- ops::softmax_sample::dispatch_softmax_sample_f32
- ops::softmax_sample::register
- ops::top_k::dispatch_top_k_f32
- ops::top_k::register
- ops::transpose::permute_021_bf16
- ops::transpose::permute_021_f32
- ops::transpose::transpose_2d
- reset_counters
- sync_count
- turboquant::compute_lloyd_max_beta_codebook
- turboquant::compute_lloyd_max_codebook
- turboquant::fwht_inplace
- turboquant::turboquant_dequantize
- turboquant::turboquant_quantize
- weight::load_quantized_weights
- weight::safetensors_to_metal_buffer
Type Aliases
Statics
- ops::argmax::ARGMAX_SHADER_SOURCE
- ops::argsort::ARGSORT_SHADER_SOURCE
- ops::copy::COPY_SHADER_SOURCE
- ops::dense_gemm::DENSE_GEMM_SHADER_SOURCE
- ops::flash_attn_vec::FLASH_ATTN_VEC_SHADER_SOURCE
- ops::flash_attn_vec_tq::FLASH_ATTN_VEC_TQ_SHADER_SOURCE
- ops::fused_head_norm_rope::FUSED_HEAD_NORM_ROPE_F32_SHADER_SOURCE
- ops::fused_head_norm_rope::FUSED_HEAD_NORM_ROPE_SHADER_SOURCE
- ops::fused_norm_add::FUSED_NORM_ADD_SHADER_SOURCE
- ops::fused_residual_norm::FUSED_RESIDUAL_NORM_SHADER_SOURCE
- ops::fwht_standalone::FWHT_STANDALONE_SHADER_SOURCE
- ops::gather::GATHER_SHADER_SOURCE
- ops::gather_bench::GATHER_BENCH_SHADER_SOURCE
- ops::gelu::GELU_SHADER_SOURCE
- ops::hadamard::HADAMARD_SHADER_SOURCE
- ops::hadamard_quantize_kv::HADAMARD_QUANTIZE_KV_SHADER_SOURCE
- ops::kv_cache_copy::KV_CACHE_COPY_SHADER_SOURCE
- ops::rms_norm::RMS_NORM_SHADER_SOURCE
- ops::rope::ROPE_SHADER_SOURCE
- ops::sdpa::SDPA_SHADER_SOURCE
- ops::sdpa_sliding::SDPA_SLIDING_SHADER_SOURCE
- ops::softcap::SOFTCAP_SHADER_SOURCE
- ops::softmax::SOFTMAX_SHADER_SOURCE
- ops::softmax_sample::SOFTMAX_SAMPLE_SHADER_SOURCE
- ops::top_k::TOP_K_SHADER_SOURCE