#[cfg(target_arch = "x86_64")]
mod avx2;
#[cfg(target_arch = "x86_64")]
mod avx512;
#[cfg(target_arch = "x86_64")]
pub mod codegen;
mod neon;
#[cfg(target_arch = "x86_64")]
pub use avx2::{
microkernel_8x6_avx2, microkernel_8x6_avx2_asm, microkernel_8x6_true_asm,
microkernel_8x8_avx2_fma,
};
#[cfg(target_arch = "x86_64")]
pub use avx512::{microkernel_16x8_avx512, microkernel_32x6_avx512};
#[cfg(target_arch = "aarch64")]
pub use neon::microkernel_8x8_neon;
use super::{MR, NR};
#[inline(never)]
pub fn microkernel_scalar(
k: usize,
a: &[f32], b: &[f32], c: &mut [f32], ldc: usize, ) {
for p in 0..k {
for jr in 0..NR {
let b_val = b[p * NR + jr];
for ir in 0..MR {
let a_val = a[p * MR + ir];
c[jr * ldc + ir] += a_val * b_val;
}
}
}
}