use crate::backend::Backend;
use ferrum_types::Result;
pub trait StackedExpertGgufLinear<B: Backend>: Send + Sync {
fn num_experts(&self) -> usize;
fn n_rows(&self) -> usize;
fn n_cols(&self) -> usize;
fn as_any(&self) -> &dyn std::any::Any;
fn gemv_moe_id(
&self,
ctx: &mut B::Context,
a: &B::Buffer,
ids: &B::Buffer,
out: &mut B::Buffer,
n_selected: usize,
src1_stride: usize,
) -> Result<()>;
#[allow(clippy::too_many_arguments)]
fn gemv_moe_id_offset(
&self,
ctx: &mut B::Context,
a: &B::Buffer,
a_offset: usize,
ids: &B::Buffer,
ids_offset: usize,
out: &mut B::Buffer,
n_selected: usize,
src1_stride: usize,
) -> Result<()>;
fn gemv_moe_id_gate_up_silu(
&self,
ctx: &mut B::Context,
a: &B::Buffer,
other_up: &dyn StackedExpertGgufLinear<B>,
ids: &B::Buffer,
silu_out: &mut B::Buffer,
n_selected: usize,
) -> Result<()>;
#[allow(clippy::too_many_arguments)]
fn gemv_moe_id_batched(
&self,
ctx: &mut B::Context,
a: &B::Buffer,
ids: &B::Buffer,
out: &mut B::Buffer,
m: usize,
top_k: usize,
src1_outer_stride: usize,
src1_inner_stride: usize,
) -> Result<()>;
#[allow(clippy::too_many_arguments)]
fn gemv_moe_id_gate_up_silu_batched(
&self,
ctx: &mut B::Context,
a: &B::Buffer,
other_up: &dyn StackedExpertGgufLinear<B>,
ids: &B::Buffer,
silu_out: &mut B::Buffer,
m: usize,
top_k: usize,
src1_outer_stride: usize,
src1_inner_stride: usize,
) -> Result<()>;
#[allow(clippy::too_many_arguments)]
fn gemm_moe_id(
&self,
ctx: &mut B::Context,
a: &B::Buffer,
ids: &B::Buffer,
tpe: &B::Buffer,
out: &mut B::Buffer,
ne11: usize,
top_k: usize,
max_per_expert: usize,
batch: usize,
) -> Result<()>;
#[allow(clippy::too_many_arguments)]
fn gemm_moe_id_indirect(
&self,
ctx: &mut B::Context,
src1: &B::Buffer,
ids: &B::Buffer,
tpe: &B::Buffer,
out: &mut B::Buffer,
args_buf: &B::Buffer,
ne11: usize,
top_k: usize,
max_per_expert: usize,
batch: usize,
) -> Result<()>;
}