pub mod batched_gemv;
pub mod f16_accumulator;
pub mod q1_0_g128;
pub mod q4_0;
pub mod q4_k;
pub mod q5_k;
pub mod q6_k;
pub mod q8_0;
pub use batched_gemv::{batched_gemv_f32, BatchedGemvConfig, BatchedGpuKernel};
#[cfg(any(feature = "gpu", test))]
pub use f16_accumulator::{dequant_q4_0_to_f16, dequant_q8_0_to_f16};
#[cfg(feature = "gpu")]
pub use f16_accumulator::{f16_gemv, upload_f16};
pub use f16_accumulator::{supports_f16, F16AccumulatorConfig};
pub use q1_0_g128::Q1_0_G128GpuKernel;
pub use q4_0::Q4_0GpuKernel;
pub use q4_k::Q4_KGpuKernel;
pub use q5_k::Q5_KGpuKernel;
pub use q6_k::Q6_KGpuKernel;
pub use q8_0::Q8_0GpuKernel;
use crate::context::GpuContext;
use crate::error::GpuResult;
pub trait GpuKernel: Send + Sync {
fn gemv(
&self,
ctx: &GpuContext,
weight_bytes: &[u8],
input: &[f32],
output: &mut [f32],
rows: usize,
cols: usize,
) -> GpuResult<()>;
}