blasoxide 0.3.2

BLAS implementation in rust
Documentation
pub fn srotg(a: f32, b: f32) -> (f32, f32, f32, f32) {
    if a == 0.0 && b == 0.0 {
        return (0.0, 0.0, 1.0, 0.0);
    }
    let h = a.hypot(b);
    let r = if a.abs() > b.abs() {
        h.copysign(a)
    } else {
        h.copysign(b)
    };
    let c = a / r;
    let s = b / r;
    let z = if a.abs() > b.abs() {
        s
    } else if c != 0.0 {
        1.0 / c
    } else {
        1.0
    };
    (r, z, c, s)
}

pub unsafe fn srot(
    n: usize,
    mut x: *mut f32,
    incx: usize,
    mut y: *mut f32,
    incy: usize,
    c: f32,
    s: f32,
) {
    #[cfg(target_arch = "x86_64")]
    {
        if is_x86_feature_detected!("fma") {
            crate::fma::srot(n, x, incx, y, incy, c, s);
            return;
        }
    }

    for _ in 0..n {
        let xi = *x;
        let yi = *y;

        *x = c * xi + s * yi;
        *y = c * yi - s * xi;

        x = x.add(incx);
        y = y.add(incy);
    }
}

pub unsafe fn sswap(n: usize, mut x: *mut f32, incx: usize, mut y: *mut f32, incy: usize) {
    #[cfg(target_arch = "x86_64")]
    {
        if is_x86_feature_detected!("fma") {
            crate::fma::sswap(n, x, incx, y, incy);
            return;
        }
    }

    for _ in 0..n {
        let xi = *x;
        let yi = *y;

        *x = yi;
        *y = xi;

        x = x.add(incx);
        y = y.add(incy);
    }
}

pub unsafe fn sscal(n: usize, a: f32, mut x: *mut f32, incx: usize) {
    #[cfg(target_arch = "x86_64")]
    {
        if is_x86_feature_detected!("fma") {
            crate::fma::sscal(n, a, x, incx);
            return;
        }
    }

    for _ in 0..n {
        *x *= a;
        x = x.add(incx);
    }
}

pub unsafe fn scopy(n: usize, mut x: *const f32, incx: usize, mut y: *mut f32, incy: usize) {
    #[cfg(target_arch = "x86_64")]
    {
        if is_x86_feature_detected!("fma") {
            crate::fma::scopy(n, x, incx, y, incy);
            return;
        }
    }

    for _ in 0..n {
        *y = *x;
        x = x.add(incx);
        y = y.add(incy);
    }
}

pub unsafe fn saxpy(
    n: usize,
    a: f32,
    mut x: *const f32,
    incx: usize,
    mut y: *mut f32,
    incy: usize,
) {
    #[cfg(target_arch = "x86_64")]
    {
        if is_x86_feature_detected!("fma") {
            crate::fma::saxpy(n, a, x, incx, y, incy);
            return;
        }
    }
    for _ in 0..n {
        *y += a * *x;
        x = x.add(incx);
        y = y.add(incy);
    }
}

pub unsafe fn sdot(
    n: usize,
    mut x: *const f32,
    incx: usize,
    mut y: *const f32,
    incy: usize,
) -> f32 {
    #[cfg(target_arch = "x86_64")]
    {
        if is_x86_feature_detected!("fma") {
            return crate::fma::sdot(n, x, incx, y, incy);
        }
    }

    let mut acc = 0.0;
    for _ in 0..n {
        acc += *x * *y;
        x = x.add(incx);
        y = y.add(incy);
    }
    acc
}

pub unsafe fn sdsdot(
    n: usize,
    b: f32,
    mut x: *const f32,
    incx: usize,
    mut y: *const f32,
    incy: usize,
) -> f32 {
    let mut acc: f64 = f64::from(b);
    for _ in 0..n {
        acc += f64::from(*x) * f64::from(*y);
        x = x.add(incx);
        y = y.add(incy);
    }
    acc as f32
}

pub unsafe fn snrm2(n: usize, mut x: *const f32, incx: usize) -> f32 {
    #[cfg(target_arch = "x86_64")]
    {
        if is_x86_feature_detected!("fma") {
            return crate::fma::snrm2(n, x, incx);
        }
    }

    let mut acc = 0.0;
    for _ in 0..n {
        let xi = *x;
        acc += xi * xi;
        x = x.add(incx);
    }
    acc.sqrt()
}

pub unsafe fn sasum(n: usize, mut x: *const f32, incx: usize) -> f32 {
    #[cfg(target_arch = "x86_64")]
    {
        if is_x86_feature_detected!("fma") {
            return crate::fma::sasum(n, x, incx);
        }
    }

    let mut acc = 0.0;
    for _ in 0..n {
        acc += (*x).abs();
        x = x.add(incx);
    }
    acc
}

pub unsafe fn isamax(n: usize, mut x: *const f32, incx: usize) -> usize {
    let mut max = 0.0;
    let mut imax = 0;
    for i in 0..n {
        let xi = (*x).abs();
        if xi > max {
            max = xi;
            imax = i;
        }
        x = x.add(incx);
    }
    imax
}