use crate::error::{RealizarError, Result};
use crate::gguf::qwen3_moe_load::Qwen3MoeQuantizedLayer;
use crate::gguf::OwnedQuantizedModel;
impl OwnedQuantizedModel {
#[allow(clippy::too_many_arguments)]
pub fn forward_qwen3_moe_gpu(
&self,
token_ids: &[u32],
moe_layers: &[Qwen3MoeQuantizedLayer],
num_experts: usize,
num_experts_per_tok: usize,
moe_intermediate: usize,
_data: &[u8],
) -> Result<Vec<f32>> {
if token_ids.is_empty() {
return Err(RealizarError::InvalidShape {
reason: "forward_qwen3_moe_gpu: token_ids must not be empty".to_string(),
});
}
if moe_layers.len() != self.layers.len() {
return Err(RealizarError::InvalidShape {
reason: format!(
"forward_qwen3_moe_gpu: moe_layers.len() = {} but model has {} decoder layers",
moe_layers.len(),
self.layers.len()
),
});
}
if num_experts == 0 || num_experts_per_tok == 0 || moe_intermediate == 0 {
return Err(RealizarError::InvalidShape {
reason: format!(
"forward_qwen3_moe_gpu: incomplete MoE config — num_experts={num_experts}, \
num_experts_per_tok={num_experts_per_tok}, moe_intermediate={moe_intermediate}. \
Caller must supply all three from GGUF metadata."
),
});
}
if num_experts_per_tok > num_experts {
return Err(RealizarError::InvalidShape {
reason: format!(
"forward_qwen3_moe_gpu: num_experts_per_tok ({num_experts_per_tok}) \
exceeds num_experts ({num_experts})"
),
});
}
Err(RealizarError::UnsupportedOperation {
operation: "forward_qwen3_moe_gpu".to_string(),
reason: format!(
"M-GPU-MOE-1.0 stub — see contracts/qwen3-moe-forward-gpu-v1.yaml \
for the in-flight implementation plan. \
Stages M-GPU-MOE-1.1 (per-expert GPU dispatch) and beyond \
are pending. Use forward_qwen3_moe (CPU LAZY-FUSED-MATVEC) for now. \
num_experts={num_experts}, num_experts_per_tok={num_experts_per_tok}, \
moe_intermediate={moe_intermediate}, layers={}",
self.layers.len()
),
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn forward_qwen3_moe_gpu_stub_returns_unsupported_pointing_at_contract() {
}
}