#[cfg(feature = "cuda")]
#[allow(clippy::too_many_arguments)]
pub(crate) fn expert_swiglu_cuda(
executor: &mut crate::cuda::CudaExecutor,
gate_bytes: &[u8],
gate_qtype: u32,
up_bytes: &[u8],
up_qtype: u32,
down_bytes: &[u8],
down_qtype: u32,
hidden: &[f32],
hidden_dim: usize,
intermediate: usize,
) -> Result<Vec<f32>> {
if hidden.len() != hidden_dim {
return Err(RealizarError::InvalidShape {
reason: format!(
"expert_swiglu_cuda: hidden.len() = {} but hidden_dim = {}",
hidden.len(),
hidden_dim
),
});
}
if hidden_dim == 0 || intermediate == 0 {
return Err(RealizarError::InvalidShape {
reason: format!(
"expert_swiglu_cuda: hidden_dim ({hidden_dim}) and intermediate \
({intermediate}) must both be > 0"
),
});
}
let mut gate_out = vec![0.0f32; intermediate];
matvec_qtype_cuda(
executor,
gate_qtype,
gate_bytes,
hidden,
&mut gate_out,
intermediate,
hidden_dim,
"gate",
)?;
let mut up_out = vec![0.0f32; intermediate];
matvec_qtype_cuda(
executor,
up_qtype,
up_bytes,
hidden,
&mut up_out,
intermediate,
hidden_dim,
"up",
)?;
let mut ffn_inner = vec![0.0f32; intermediate];
for i in 0..intermediate {
let g = gate_out[i];
let silu_g = g / (1.0 + (-g).exp());
ffn_inner[i] = silu_g * up_out[i];
}
let mut expert_out = vec![0.0f32; hidden_dim];
matvec_qtype_cuda(
executor,
down_qtype,
down_bytes,
&ffn_inner,
&mut expert_out,
hidden_dim,
intermediate,
"down",
)?;
Ok(expert_out)
}
#[cfg(feature = "cuda")]
#[allow(clippy::too_many_arguments)]
fn matvec_qtype_cuda(
executor: &mut crate::cuda::CudaExecutor,
qtype: u32,
bytes: &[u8],
activations: &[f32],
out: &mut [f32],
out_dim: usize,
in_dim: usize,
role: &'static str,
) -> Result<()> {
use crate::gguf::types::{GGUF_TYPE_Q4_K, GGUF_TYPE_Q6_K};
match qtype {
GGUF_TYPE_Q4_K => executor
.q4k_matvec(bytes, activations, out, out_dim as u32, in_dim as u32)
.map_err(|e| RealizarError::UnsupportedOperation {
operation: format!("expert_swiglu_cuda::{role}_q4k_matvec"),
reason: format!("{e}"),
}),
GGUF_TYPE_Q6_K => executor
.q6k_gemv(bytes, activations, out, out_dim as u32, in_dim as u32)
.map_err(|e| RealizarError::UnsupportedOperation {
operation: format!("expert_swiglu_cuda::{role}_q6k_gemv"),
reason: format!("{e}"),
}),
other => Err(RealizarError::UnsupportedOperation {
operation: format!("expert_swiglu_cuda::{role}_matvec"),
reason: format!(
"MoE expert tensor qtype {other} not supported. Qwen3-Coder Q4_K_M uses \
Q4_K (12) and Q6_K (14) — caller must extend matvec_qtype_cuda for other \
quantizations (mirror of CPU matvec_for_qtype in qwen3_moe_load.rs)."
),
}),
}
}
#[cfg(test)]
mod expert_swiglu_cuda_tests {
use super::*;
use crate::gguf::types::{GGUF_TYPE_Q4_K, GGUF_TYPE_Q6_K};
#[test]
fn expert_swiglu_cuda_signature_drift_gate() {}
#[cfg(feature = "cuda")]
#[test]
fn expert_swiglu_cuda_rejects_mismatched_hidden_len() {
if let Ok(mut executor) = crate::cuda::CudaExecutor::new(0) {
let dummy_bytes = vec![0u8; 144];
let hidden = vec![1.0f32; 5];
let result = expert_swiglu_cuda(
&mut executor,
&dummy_bytes,
GGUF_TYPE_Q4_K,
&dummy_bytes,
GGUF_TYPE_Q4_K,
&dummy_bytes,
GGUF_TYPE_Q6_K,
&hidden,
10,
4,
);
assert!(matches!(result, Err(RealizarError::InvalidShape { .. })));
}
}
#[cfg(feature = "cuda")]
#[test]
fn falsify_qw3_moe_gpu_qtype_aware_dispatch_rejects_unknown() {
if let Ok(mut executor) = crate::cuda::CudaExecutor::new(0) {
let dummy_bytes = vec![0u8; 1024];
let hidden = vec![1.0f32; 4];
const GGUF_TYPE_Q8_0: u32 = 8;
let result = expert_swiglu_cuda(
&mut executor,
&dummy_bytes,
GGUF_TYPE_Q8_0, &dummy_bytes,
GGUF_TYPE_Q4_K,
&dummy_bytes,
GGUF_TYPE_Q6_K,
&hidden,
4,
10,
);
assert!(
matches!(result, Err(RealizarError::UnsupportedOperation { .. })),
"expected UnsupportedOperation for unknown gate qtype, got {result:?}"
);
}
}
#[test]
fn expert_swiglu_cuda_signature_has_three_qtype_params() {
#[cfg(feature = "cuda")]
let _f: fn(
&mut crate::cuda::CudaExecutor,
&[u8],
u32, &[u8],
u32, &[u8],
u32, &[f32],
usize,
usize,
) -> Result<Vec<f32>> = expert_swiglu_cuda;
}
}