#![cfg(feature = "gpu-pixels")]
use super::*;
#[test]
fn ptx_pixel_fkr_gemm_tiled_no_bugs() {
let kernel = GemmKernel::tiled(32, 32, 128, 32);
let ptx = kernel.emit_ptx();
let result = validate_ptx(&ptx);
assert!(
!result.has_bug(&PtxBugClass::SharedMemU64Addressing),
"GEMM tiled kernel uses u64 for shared memory (should be u32)"
);
assert!(
!result.has_bug(&PtxBugClass::MissingBarrierSync),
"GEMM tiled kernel missing barrier synchronization"
);
println!("ptx_pixel_fkr_gemm_tiled: PASS (no shared memory bugs)");
}
#[test]
fn ptx_pixel_fkr_gemm_tensor_core() {
let kernel = GemmKernel::tensor_core(32, 32, 64);
let ptx = kernel.emit_ptx();
let result = validate_ptx(&ptx);
assert!(
!result.has_bug(&PtxBugClass::SharedMemU64Addressing),
"Tensor core GEMM uses u64 for shared memory"
);
println!("ptx_pixel_fkr_gemm_tensor_core: PASS");
}
#[test]
fn ptx_pixel_fkr_attention() {
let kernel = AttentionKernel::new(64, 64);
let ptx = kernel.emit_ptx();
let result = validate_ptx(&ptx);
assert!(
!result.has_bug(&PtxBugClass::SharedMemU64Addressing),
"Attention kernel uses u64 for shared memory"
);
assert!(
ptx.contains("bar.sync"),
"Attention kernel must have barrier synchronization"
);
println!("ptx_pixel_fkr_attention: PASS");
}
#[test]
fn ptx_pixel_fkr_attention_causal() {
let kernel = AttentionKernel::new(64, 64).with_causal();
let ptx = kernel.emit_ptx();
assert!(
ptx.contains("flash_attention_causal") || ptx.contains("causal"),
"Causal attention should have _causal suffix"
);
println!("ptx_pixel_fkr_attention_causal: PASS");
}
#[test]
fn ptx_pixel_fkr_softmax_entry() {
let kernel = SoftmaxKernel::new(128);
let ptx = kernel.emit_ptx();
let result = validate_ptx(&ptx);
assert!(
!result.has_bug(&PtxBugClass::MissingEntryPoint),
"Softmax kernel must have entry point"
);
println!("ptx_pixel_fkr_softmax: PASS");
}
#[test]
fn ptx_pixel_fkr_layernorm_entry() {
let kernel = LayerNormKernel::new(256);
let ptx = kernel.emit_ptx();
let result = validate_ptx(&ptx);
assert!(
!result.has_bug(&PtxBugClass::MissingEntryPoint),
"LayerNorm kernel must have entry point"
);
println!("ptx_pixel_fkr_layernorm: PASS");
}
#[test]
fn ptx_pixel_fkr_bias_activation_entry() {
for activation in [Activation::None, Activation::ReLU, Activation::GELU] {
let kernel = BiasActivationKernel::new(1024, 64).with_activation(activation);
let ptx = kernel.emit_ptx();
let result = validate_ptx(&ptx);
assert!(
!result.has_bug(&PtxBugClass::MissingEntryPoint),
"BiasActivation kernel ({:?}) must have entry point",
activation
);
}
println!("ptx_pixel_fkr_bias_activation: PASS (all variants)");
}
#[test]
fn ptx_pixel_fkr_bias_activation_gelu_approx() {
let kernel = BiasActivationKernel::new(1024, 64).with_gelu();
let ptx = kernel.emit_ptx();
assert!(
ptx.contains("ex2.approx") || ptx.contains("ex2.f32"),
"GELU should use ex2 for fast exp approximation"
);
println!("ptx_pixel_fkr_bias_activation_gelu: PASS (uses ex2 approximation)");
}
#[test]
fn ptx_pixel_fkr_bias_activation_relu_max() {
let kernel = BiasActivationKernel::new(1024, 64).with_relu();
let ptx = kernel.emit_ptx();
assert!(
ptx.contains("max.f32"),
"ReLU should use max.f32 instruction"
);
println!("ptx_pixel_fkr_bias_activation_relu: PASS (uses max.f32)");
}