[package]
edition = "2024"
name = "metaltile-std"
version = "0.1.0"
build = "build.rs"
publish = true
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "MetalTile kernel standard library — benchmark metadata and type definitions"
readme = "README.md"
license = "Apache-2.0"
resolver = "2"
[lib]
name = "metaltile_std"
path = "src/lib.rs"
[[test]]
name = "affine_int2_gpu_correctness"
path = "tests/affine_int2_gpu_correctness.rs"
[[test]]
name = "affine_int356_quantize_gpu_correctness"
path = "tests/affine_int356_quantize_gpu_correctness.rs"
[[test]]
name = "arange_gpu_correctness"
path = "tests/arange_gpu_correctness.rs"
[[test]]
name = "arg_reduce_gpu_correctness"
path = "tests/arg_reduce_gpu_correctness.rs"
[[test]]
name = "argmax_gpu_correctness"
path = "tests/argmax_gpu_correctness.rs"
[[test]]
name = "audio_conv1d_gpu_correctness"
path = "tests/audio_conv1d_gpu_correctness.rs"
[[test]]
name = "aura_dequant_rotated_gpu_correctness"
path = "tests/aura_dequant_rotated_gpu_correctness.rs"
[[test]]
name = "aura_encode_gpu_correctness"
path = "tests/aura_encode_gpu_correctness.rs"
[[test]]
name = "aura_flash_gpu_correctness"
path = "tests/aura_flash_gpu_correctness.rs"
[[test]]
name = "aura_flash_p1_causal_gpu_correctness"
path = "tests/aura_flash_p1_causal_gpu_correctness.rs"
[[test]]
name = "aura_flash_sdpa_gpu_correctness"
path = "tests/aura_flash_sdpa_gpu_correctness.rs"
[[test]]
name = "aura_msl_snapshots"
path = "tests/aura_msl_snapshots.rs"
[[test]]
name = "aura_score_gpu_correctness"
path = "tests/aura_score_gpu_correctness.rs"
[[test]]
name = "aura_value_gpu_correctness"
path = "tests/aura_value_gpu_correctness.rs"
[[test]]
name = "batched_qkv_qgemv_gpu_correctness"
path = "tests/batched_qkv_qgemv_gpu_correctness.rs"
[[test]]
name = "binary_gpu_correctness"
path = "tests/binary_gpu_correctness.rs"
[[test]]
name = "binary_two_gpu_correctness"
path = "tests/binary_two_gpu_correctness.rs"
[[test]]
name = "cast_to_f32_gpu_correctness"
path = "tests/cast_to_f32_gpu_correctness.rs"
[[test]]
name = "conv1d_causal_step_gpu_correctness"
path = "tests/conv1d_causal_step_gpu_correctness.rs"
[[test]]
name = "conv2d_gpu_correctness"
path = "tests/conv2d_gpu_correctness.rs"
[[test]]
name = "conv2d_mma_gpu_correctness"
path = "tests/conv2d_mma_gpu_correctness.rs"
[[test]]
name = "conv3d_gpu_correctness"
path = "tests/conv3d_gpu_correctness.rs"
[[test]]
name = "conv3d_mma_gpu_correctness"
path = "tests/conv3d_mma_gpu_correctness.rs"
[[test]]
name = "copy_gpu_correctness"
path = "tests/copy_gpu_correctness.rs"
[[test]]
name = "dequant_gather_gpu_correctness"
path = "tests/dequant_gather_gpu_correctness.rs"
[[test]]
name = "dequant_gemv_gpu_correctness"
path = "tests/dequant_gemv_gpu_correctness.rs"
[[test]]
name = "fft_bluestein_gpu_correctness"
path = "tests/fft_bluestein_gpu_correctness.rs"
[[test]]
name = "fft_gpu_correctness"
path = "tests/fft_gpu_correctness.rs"
[[test]]
name = "flash_quantized_sdpa_gpu_correctness"
path = "tests/flash_quantized_sdpa_gpu_correctness.rs"
[[test]]
name = "flash_quantized_sdpa_mask_gpu_correctness"
path = "tests/flash_quantized_sdpa_mask_gpu_correctness.rs"
[[test]]
name = "fp_quantized_fp4_gpu_correctness"
path = "tests/fp_quantized_fp4_gpu_correctness.rs"
[[test]]
name = "fp_quantized_fp8_gpu_correctness"
path = "tests/fp_quantized_fp8_gpu_correctness.rs"
[[test]]
name = "fp_quantized_mma_gpu_correctness"
path = "tests/fp_quantized_mma_gpu_correctness.rs"
[[test]]
name = "fp_quantized_nax_gpu_correctness"
path = "tests/fp_quantized_nax_gpu_correctness.rs"
[[test]]
name = "fused_gate_activation_gpu_correctness"
path = "tests/fused_gate_activation_gpu_correctness.rs"
[[test]]
name = "gated_delta_gpu_correctness"
path = "tests/gated_delta_gpu_correctness.rs"
[[test]]
name = "gated_delta_prep_chunk_correctness"
path = "tests/gated_delta_prep_chunk_correctness.rs"
[[test]]
name = "gated_delta_prep_step_correctness"
path = "tests/gated_delta_prep_step_correctness.rs"
[[test]]
name = "gated_delta_replay_gpu_correctness"
path = "tests/gated_delta_replay_gpu_correctness.rs"
[[test]]
name = "gated_delta_wy_cpu_oracle"
path = "tests/gated_delta_wy_cpu_oracle.rs"
[[test]]
name = "gated_delta_wy_gpu_correctness"
path = "tests/gated_delta_wy_gpu_correctness.rs"
[[test]]
name = "gated_delta_wy_microbench"
path = "tests/gated_delta_wy_microbench.rs"
[[test]]
name = "gated_mixer_norm_gpu_correctness"
path = "tests/gated_mixer_norm_gpu_correctness.rs"
[[test]]
name = "gated_rmsnorm_gpu_correctness"
path = "tests/gated_rmsnorm_gpu_correctness.rs"
[[test]]
name = "gather_axis_gpu_correctness"
path = "tests/gather_axis_gpu_correctness.rs"
[[test]]
name = "gather_gpu_correctness"
path = "tests/gather_gpu_correctness.rs"
[[test]]
name = "gemm_gpu_correctness"
path = "tests/gemm_gpu_correctness.rs"
[[test]]
name = "gemv_gpu_correctness"
path = "tests/gemv_gpu_correctness.rs"
[[test]]
name = "gemv_masked_gpu_correctness"
path = "tests/gemv_masked_gpu_correctness.rs"
[[test]]
name = "hadamard_gpu_correctness"
path = "tests/hadamard_gpu_correctness.rs"
[[test]]
name = "hadamard_m_gpu_correctness"
path = "tests/hadamard_m_gpu_correctness.rs"
[[test]]
name = "indexing_gpu_correctness"
path = "tests/indexing_gpu_correctness.rs"
[[test]]
name = "kernel_registry_consistency"
path = "tests/kernel_registry_consistency.rs"
[[test]]
name = "kv_cache_fp8_gpu_correctness"
path = "tests/kv_cache_fp8_gpu_correctness.rs"
[[test]]
name = "kv_cache_quant_roundtrip_gpu"
path = "tests/kv_cache_quant_roundtrip_gpu.rs"
[[test]]
name = "kv_cache_update_gpu_correctness"
path = "tests/kv_cache_update_gpu_correctness.rs"
[[test]]
name = "layer_norm_gpu_correctness"
path = "tests/layer_norm_gpu_correctness.rs"
[[test]]
name = "logits_min_p_mask_gpu_correctness"
path = "tests/logits_min_p_mask_gpu_correctness.rs"
[[test]]
name = "logits_processors_gpu_correctness"
path = "tests/logits_processors_gpu_correctness.rs"
[[test]]
name = "logits_top_p_mask_gpu_correctness"
path = "tests/logits_top_p_mask_gpu_correctness.rs"
[[test]]
name = "logits_topk_mask_gpu_correctness"
path = "tests/logits_topk_mask_gpu_correctness.rs"
[[test]]
name = "logsumexp_gpu_correctness"
path = "tests/logsumexp_gpu_correctness.rs"
[[test]]
name = "mel_spectrogram_gpu_correctness"
path = "tests/mel_spectrogram_gpu_correctness.rs"
[[test]]
name = "mma_layout_probe"
path = "tests/mma_layout_probe.rs"
[[test]]
name = "moe_gather_qmm_gpu_correctness"
path = "tests/moe_gather_qmm_gpu_correctness.rs"
[[test]]
name = "moe_gather_qmm_int4_m16_m32_correctness"
path = "tests/moe_gather_qmm_int4_m16_m32_correctness.rs"
[[test]]
name = "moe_gather_qmm_microbench"
path = "tests/moe_gather_qmm_microbench.rs"
[[test]]
name = "moe_gather_qmm_mma_bitwidth_correctness"
path = "tests/moe_gather_qmm_mma_bitwidth_correctness.rs"
[[test]]
name = "moe_gather_qmm_mma_int8_gpu_correctness"
path = "tests/moe_gather_qmm_mma_int8_gpu_correctness.rs"
[[test]]
name = "moe_gather_qmm_mpp_bm64_correctness"
path = "tests/moe_gather_qmm_mpp_bm64_correctness.rs"
[[test]]
name = "moe_gather_qmm_mpp_bm64_int8_correctness"
path = "tests/moe_gather_qmm_mpp_bm64_int8_correctness.rs"
[[test]]
name = "moe_gather_qmm_mpp_bm8_correctness"
path = "tests/moe_gather_qmm_mpp_bm8_correctness.rs"
[[test]]
name = "moe_gather_qmm_mpp_bm8_int8_correctness"
path = "tests/moe_gather_qmm_mpp_bm8_int8_correctness.rs"
[[test]]
name = "moe_gather_qmm_mpp_correctness"
path = "tests/moe_gather_qmm_mpp_correctness.rs"
[[test]]
name = "moe_gather_qmm_mpp_int8_correctness"
path = "tests/moe_gather_qmm_mpp_int8_correctness.rs"
[[test]]
name = "moe_gpu_correctness"
path = "tests/moe_gpu_correctness.rs"
[[test]]
name = "mpp_matmul_smoke"
path = "tests/mpp_matmul_smoke.rs"
[[test]]
name = "mt_arg_reduce_gpu_correctness"
path = "tests/mt_arg_reduce_gpu_correctness.rs"
[[test]]
name = "mt_qmv_gpu_correctness"
path = "tests/mt_qmv_gpu_correctness.rs"
[[test]]
name = "mt_remainder_gpu_correctness"
path = "tests/mt_remainder_gpu_correctness.rs"
[[test]]
name = "mt_strided_copy_gpu_correctness"
path = "tests/mt_strided_copy_gpu_correctness.rs"
[[test]]
name = "patch_embed_gpu_correctness"
path = "tests/patch_embed_gpu_correctness.rs"
[[test]]
name = "patch_embed_mma_gpu_correctness"
path = "tests/patch_embed_mma_gpu_correctness.rs"
[[test]]
name = "qmm_gpu_correctness"
path = "tests/qmm_gpu_correctness.rs"
[[test]]
name = "qmm_int8_fast_gpu_correctness"
path = "tests/qmm_int8_fast_gpu_correctness.rs"
[[test]]
name = "qmm_mma_b356_gpu_correctness"
path = "tests/qmm_mma_b356_gpu_correctness.rs"
[[test]]
name = "qmm_mma_dynamic_m_correctness"
path = "tests/qmm_mma_dynamic_m_correctness.rs"
[[test]]
name = "qmm_mma_int8_gpu_correctness"
path = "tests/qmm_mma_int8_gpu_correctness.rs"
[[test]]
name = "qmm_mpp_correctness"
path = "tests/qmm_mpp_correctness.rs"
[[test]]
name = "qmm_mpp_int8_correctness"
path = "tests/qmm_mpp_int8_correctness.rs"
[[test]]
name = "qmv_int8_fast_gpu_correctness"
path = "tests/qmv_int8_fast_gpu_correctness.rs"
[[test]]
name = "quantized_family_gpu_correctness"
path = "tests/quantized_family_gpu_correctness.rs"
[[test]]
name = "quantized_nax_gpu_correctness"
path = "tests/quantized_nax_gpu_correctness.rs"
[[test]]
name = "quantized_nax_int8_gpu_correctness"
path = "tests/quantized_nax_int8_gpu_correctness.rs"
[[test]]
name = "qvm_int4_fast_gpu_correctness"
path = "tests/qvm_int4_fast_gpu_correctness.rs"
[[test]]
name = "random_gpu_correctness"
path = "tests/random_gpu_correctness.rs"
[[test]]
name = "reduce_col_seg_gpu_correctness"
path = "tests/reduce_col_seg_gpu_correctness.rs"
[[test]]
name = "rms_norm_gpu_correctness"
path = "tests/rms_norm_gpu_correctness.rs"
[[test]]
name = "rms_norm_per_head_gpu"
path = "tests/rms_norm_per_head_gpu.rs"
[[test]]
name = "rms_norm_qgemv_gpu_correctness"
path = "tests/rms_norm_qgemv_gpu_correctness.rs"
[[test]]
name = "rms_norm_qgemv_int8_fast_gpu_correctness"
path = "tests/rms_norm_qgemv_int8_fast_gpu_correctness.rs"
[[test]]
name = "rms_norm_residual_gpu_correctness"
path = "tests/rms_norm_residual_gpu_correctness.rs"
[[test]]
name = "rms_norm_rope_gpu_correctness"
path = "tests/rms_norm_rope_gpu_correctness.rs"
[[test]]
name = "rms_norm_wide_gpu_correctness"
path = "tests/rms_norm_wide_gpu_correctness.rs"
[[test]]
name = "rope_2d_gpu_correctness"
path = "tests/rope_2d_gpu_correctness.rs"
[[test]]
name = "rope_gpu_correctness"
path = "tests/rope_gpu_correctness.rs"
[[test]]
name = "rope_llama_gpu_correctness"
path = "tests/rope_llama_gpu_correctness.rs"
[[test]]
name = "rope_yarn_gpu_correctness"
path = "tests/rope_yarn_gpu_correctness.rs"
[[test]]
name = "scan_exclusive_gpu_correctness"
path = "tests/scan_exclusive_gpu_correctness.rs"
[[test]]
name = "scan_multi_op_gpu_correctness"
path = "tests/scan_multi_op_gpu_correctness.rs"
[[test]]
name = "scatter_axis_gpu_correctness"
path = "tests/scatter_axis_gpu_correctness.rs"
[[test]]
name = "sdpa_bidirectional_gpu_correctness"
path = "tests/sdpa_bidirectional_gpu_correctness.rs"
[[test]]
name = "sdpa_decode_2pass_gpu"
path = "tests/sdpa_decode_2pass_gpu.rs"
[[test]]
name = "sdpa_decode_batched_gpu_correctness"
path = "tests/sdpa_decode_batched_gpu_correctness.rs"
[[test]]
name = "sdpa_decode_batched_prefill_gpu_correctness"
path = "tests/sdpa_decode_batched_prefill_gpu_correctness.rs"
[[test]]
name = "sdpa_decode_batched_q8_gpu_correctness"
path = "tests/sdpa_decode_batched_q8_gpu_correctness.rs"
[[test]]
name = "sdpa_decode_d256_gpu_correctness"
path = "tests/sdpa_decode_d256_gpu_correctness.rs"
[[test]]
name = "sdpa_decode_d512_gpu_correctness"
path = "tests/sdpa_decode_d512_gpu_correctness.rs"
[[test]]
name = "sdpa_decode_d64_gpu_correctness"
path = "tests/sdpa_decode_d64_gpu_correctness.rs"
[[test]]
name = "sdpa_decode_gpu_correctness"
path = "tests/sdpa_decode_gpu_correctness.rs"
[[test]]
name = "sdpa_decode_swa_gpu"
path = "tests/sdpa_decode_swa_gpu.rs"
[[test]]
name = "sdpa_multi_gpu_correctness"
path = "tests/sdpa_multi_gpu_correctness.rs"
[[test]]
name = "sdpa_prefill_mma_bf16_gpu"
path = "tests/sdpa_prefill_mma_bf16_gpu.rs"
[[test]]
name = "sdpa_prefill_mma_long_t"
path = "tests/sdpa_prefill_mma_long_t.rs"
[[test]]
name = "sdpa_vector_gpu_correctness"
path = "tests/sdpa_vector_gpu_correctness.rs"
[[test]]
name = "sgload_smoke_gpu"
path = "tests/sgload_smoke_gpu.rs"
[[test]]
name = "sigmoid_scalar_fma_gpu_correctness"
path = "tests/sigmoid_scalar_fma_gpu_correctness.rs"
[[test]]
name = "softmax_categorical_sample_gpu_correctness"
path = "tests/softmax_categorical_sample_gpu_correctness.rs"
[[test]]
name = "softmax_categorical_sample_matrix"
path = "tests/softmax_categorical_sample_matrix.rs"
[[test]]
name = "softmax_categorical_sample_perf"
path = "tests/softmax_categorical_sample_perf.rs"
[[test]]
name = "softmax_gpu_correctness"
path = "tests/softmax_gpu_correctness.rs"
[[test]]
name = "sort_gpu_correctness"
path = "tests/sort_gpu_correctness.rs"
[[test]]
name = "sort_segmented_gpu_correctness"
path = "tests/sort_segmented_gpu_correctness.rs"
[[test]]
name = "ssm_gpu_correctness"
path = "tests/ssm_gpu_correctness.rs"
[[test]]
name = "ssm_replay_gpu_correctness"
path = "tests/ssm_replay_gpu_correctness.rs"
[[test]]
name = "ssm_step_a2d_gpu_correctness"
path = "tests/ssm_step_a2d_gpu_correctness.rs"
[[test]]
name = "ssm_step_gpu_correctness"
path = "tests/ssm_step_gpu_correctness.rs"
[[test]]
name = "steel_attention_gpu_correctness"
path = "tests/steel_attention_gpu_correctness.rs"
[[test]]
name = "steel_attention_nax_gpu_correctness"
path = "tests/steel_attention_nax_gpu_correctness.rs"
[[test]]
name = "steel_gemm_fused_nax_gpu_correctness"
path = "tests/steel_gemm_fused_nax_gpu_correctness.rs"
[[test]]
name = "steel_gemm_gather_gpu_correctness"
path = "tests/steel_gemm_gather_gpu_correctness.rs"
[[test]]
name = "steel_gemm_gather_nax_gpu_correctness"
path = "tests/steel_gemm_gather_nax_gpu_correctness.rs"
[[test]]
name = "steel_gemm_gpu_correctness"
path = "tests/steel_gemm_gpu_correctness.rs"
[[test]]
name = "steel_gemm_masked_gpu_correctness"
path = "tests/steel_gemm_masked_gpu_correctness.rs"
[[test]]
name = "steel_gemm_segmented_gpu_correctness"
path = "tests/steel_gemm_segmented_gpu_correctness.rs"
[[test]]
name = "steel_gemm_splitk_gpu_correctness"
path = "tests/steel_gemm_splitk_gpu_correctness.rs"
[[test]]
name = "steel_gemm_splitk_nax_gpu_correctness"
path = "tests/steel_gemm_splitk_nax_gpu_correctness.rs"
[[test]]
name = "steel_msl_snapshots"
path = "tests/steel_msl_snapshots.rs"
[[test]]
name = "swiglu_gpu_correctness"
path = "tests/swiglu_gpu_correctness.rs"
[[test]]
name = "ternary_gpu_correctness"
path = "tests/ternary_gpu_correctness.rs"
[[test]]
name = "unary_gpu_correctness"
path = "tests/unary_gpu_correctness.rs"
[[test]]
name = "vector_add_gpu_correctness"
path = "tests/vector_add_gpu_correctness.rs"
[[test]]
name = "vocoder_gpu_correctness"
path = "tests/vocoder_gpu_correctness.rs"
[[test]]
name = "winograd_conv_gpu_correctness"
path = "tests/winograd_conv_gpu_correctness.rs"
[dependencies.bytemuck]
version = "1"
[dependencies.half]
version = "2"
[dependencies.inventory]
version = "0.3"
[dependencies.metaltile]
version = "0.1.0"
[dependencies.metaltile-codegen]
version = "0.1.0"
[dependencies.metaltile-core]
version = "0.1.0"
[dependencies.metaltile-runtime]
version = "0.1.0"
[dependencies.rustc-hash]
version = "2"
[dependencies.thiserror]
version = "2"
[dev-dependencies.insta]
version = "1.43"
default-features = false
[target.'cfg(target_os = "macos")'.dependencies.objc2]
version = "0.6"
[target.'cfg(target_os = "macos")'.dependencies.objc2-foundation]
version = "0.3"
[target.'cfg(target_os = "macos")'.dependencies.objc2-metal]
version = "0.3"
features = [
"MTLDevice",
"MTLCommandQueue",
"MTLLibrary",
"MTLComputePipeline",
"MTLComputeCommandEncoder",
"MTLBuffer",
"MTLCommandBuffer",
"MTLCommandEncoder",
"MTLFunctionConstantValues",
"MTLDataType",
]