use crate::error::Result;
use numr::runtime::Runtime;
use numr::tensor::Tensor;
#[allow(clippy::too_many_arguments)]
pub trait FusedQuantOps<R: Runtime> {
fn fused_int4_swiglu(
&self,
input: &Tensor<R>,
gate_qweight: &Tensor<R>,
gate_scales: &Tensor<R>,
gate_zeros: &Tensor<R>,
up_qweight: &Tensor<R>,
up_scales: &Tensor<R>,
up_zeros: &Tensor<R>,
group_size: usize,
) -> Result<Tensor<R>>;
fn fused_int4_qkv(
&self,
input: &Tensor<R>,
qweight_q: &Tensor<R>,
scales_q: &Tensor<R>,
zeros_q: &Tensor<R>,
qweight_k: &Tensor<R>,
scales_k: &Tensor<R>,
zeros_k: &Tensor<R>,
qweight_v: &Tensor<R>,
scales_v: &Tensor<R>,
zeros_v: &Tensor<R>,
group_size: usize,
) -> Result<(Tensor<R>, Tensor<R>, Tensor<R>)>;
}