tensorism 0.5.1

use criterion::{Criterion, criterion_group, criterion_main};
use ndarray::{Array1, Array2, Array3, Axis, Zip};
use std::hint::black_box;
use tensorism::new_ndarray;
use utils::C64;

fn tensorism(
    t_a: &Array2<f64>,
    t_b: &Array1<C64>,
    t_c: &Array2<f64>,
    t_d: &Array1<i32>,
) -> Array3<f64> {
    new_ndarray! {
        for i j k => t_a[j, i] * t_c[i, k] + t_b[k].re() * t_b[k].re() + (t_d[j] as f64).sin()
    }
}

fn flatten_low(
    t_a: &Array2<f64>,
    t_b: &Array1<C64>,
    t_c: &Array2<f64>,
    t_d: &Array1<i32>,
) -> Array3<f64> {
    let dim_number_0 = ::ndarray::ArrayBase::<_, _>::dim(&t_a).1;
    if dim_number_0 != ::ndarray::ArrayBase::<_, _>::dim(&t_c).0 {
        panic!("Dimensions are not matching between t_a[_, i] and t_c[i, _]");
    }
    let dim_number_1 = ::ndarray::ArrayBase::<_, _>::dim(&t_a).0;
    if dim_number_1 != ::ndarray::ArrayBase::<_, _>::dim(&t_d) {
        panic!("Dimensions are not matching between t_a[j, _] and t_d[j]");
    }
    let dim_number_2 = ::ndarray::ArrayBase::<_, _>::dim(&t_c).1;
    if dim_number_2 != ::ndarray::ArrayBase::<_, _>::dim(&t_b) {
        panic!("Dimensions are not matching between t_c[_, k] and t_b[k]");
    }
    if dim_number_2 != ::ndarray::ArrayBase::<_, _>::dim(&t_b) {
        panic!("Dimensions are not matching between t_c[_, k] and t_b[k]");
    }
    let strides_a = t_a.strides();
    if strides_a != [dim_number_0 as isize, 1isize] {
        panic!("t_a has unexpected memory layout {:?}", strides_a);
    }
    let p_a = t_a.as_ptr();
    let strides_b = t_b.strides();
    if strides_b != [1isize] {
        panic!("t_b has unexpected memory layout {:?}", strides_b);
    }
    let p_b = t_b.as_ptr();
    let strides_c = t_c.strides();
    if strides_c != [dim_number_2 as isize, 1isize] {
        panic!("t_c has unexpected memory layout {:?}", strides_c);
    }
    let p_c = t_c.as_ptr();
    let strides_d = t_d.strides();
    if strides_d != [1isize] {
        panic!("t_d has unexpected memory layout {:?}", strides_d);
    }
    let p_d = t_d.as_ptr();

    let mut result = ::ndarray::Array::<f64, ::ndarray::Dim<[::ndarray::Ix; 3usize]>>::uninit((
        dim_number_0,
        dim_number_1,
        dim_number_2,
    ));
    let mut ptr_on_result = result.as_mut_ptr() as *mut f64;
    for i in 0..dim_number_0 {
        for j in 0..dim_number_1 {
            for k in 0..dim_number_2 {
                let value = (unsafe { *p_a.add(j * dim_number_0 + i) })
                    * (unsafe { *p_c.add(i * dim_number_2 + k) })
                    + (unsafe { *p_b.add(k) }).re() * (unsafe { *p_b.add(k) }).re()
                    + ((unsafe { *p_d.add(j) }) as f64).sin();
                unsafe {
                    ptr_on_result.write(value);
                    ptr_on_result = ptr_on_result.add(1);
                }
            }
        }
    }
    unsafe { result.assume_init() }
}

fn flatten_high(
    t_a: &Array2<f64>,
    t_b: &Array1<C64>,
    t_c: &Array2<f64>,
    t_d: &Array1<i32>,
) -> Array3<f64> {
    let dim_number_0 = ::ndarray::ArrayBase::<_, _>::dim(&t_a).1;
    if dim_number_0 != ::ndarray::ArrayBase::<_, _>::dim(&t_c).0 {
        panic!("Dimensions are not matching between t_a[_, i] and t_c[i, _]");
    }
    let dim_number_1 = ::ndarray::ArrayBase::<_, _>::dim(&t_a).0;
    if dim_number_1 != ::ndarray::ArrayBase::<_, _>::dim(&t_d) {
        panic!("Dimensions are not matching between t_a[j, _] and t_d[j]");
    }
    let dim_number_2 = ::ndarray::ArrayBase::<_, _>::dim(&t_c).1;
    if dim_number_2 != ::ndarray::ArrayBase::<_, _>::dim(&t_b) {
        panic!("Dimensions are not matching between t_c[_, k] and t_b[k]");
    }
    if dim_number_2 != ::ndarray::ArrayBase::<_, _>::dim(&t_b) {
        panic!("Dimensions are not matching between t_c[_, k] and t_b[k]");
    }
    let strides_a = t_a.strides();
    if strides_a != [dim_number_0 as isize, 1isize] {
        panic!("t_a has unexpected memory layout {:?}", strides_a);
    }
    let p_a = t_a.as_ptr();
    let strides_b = t_b.strides();
    if strides_b != [1isize] {
        panic!("t_b has unexpected memory layout {:?}", strides_b);
    }
    let p_b = t_b.as_ptr();
    let strides_c = t_c.strides();
    if strides_c != [dim_number_2 as isize, 1isize] {
        panic!("t_c has unexpected memory layout {:?}", strides_c);
    }
    let p_c = t_c.as_ptr();
    let strides_d = t_d.strides();
    if strides_d != [1isize] {
        panic!("t_d has unexpected memory layout {:?}", strides_d);
    }
    let p_d = t_d.as_ptr();

    let mut result = ::ndarray::Array::<f64, ::ndarray::Dim<[::ndarray::Ix; 3usize]>>::uninit((
        dim_number_0,
        dim_number_1,
        dim_number_2,
    ));
    let mut ptr_on_result = result.as_mut_ptr() as *mut f64;
    for i in 0..dim_number_0 {
        let i_c = i * dim_number_2;
        for j in 0..dim_number_1 {
            let i_a = j * dim_number_0 + i;
            for k in 0..dim_number_2 {
                let value = (unsafe { *p_a.add(i_a) }) * (unsafe { *p_c.add(i_c + k) })
                    + (unsafe { *p_b.add(k) }).re() * (unsafe { *p_b.add(k) }).re()
                    + ((unsafe { *p_d.add(j) }) as f64).sin();
                unsafe {
                    ptr_on_result.write(value);
                    ptr_on_result = ptr_on_result.add(1);
                }
            }
        }
    }
    unsafe { result.assume_init() }
}

fn flatten_var(
    t_a: &Array2<f64>,
    t_b: &Array1<C64>,
    t_c: &Array2<f64>,
    t_d: &Array1<i32>,
) -> Array3<f64> {
    let dim_number_0 = ::ndarray::ArrayBase::<_, _>::dim(&t_a).1;
    if dim_number_0 != ::ndarray::ArrayBase::<_, _>::dim(&t_c).0 {
        panic!("Dimensions are not matching between t_a[_, i] and t_c[i, _]");
    }
    let dim_number_1 = ::ndarray::ArrayBase::<_, _>::dim(&t_a).0;
    if dim_number_1 != ::ndarray::ArrayBase::<_, _>::dim(&t_d) {
        panic!("Dimensions are not matching between t_a[j, _] and t_d[j]");
    }
    let dim_number_2 = ::ndarray::ArrayBase::<_, _>::dim(&t_c).1;
    if dim_number_2 != ::ndarray::ArrayBase::<_, _>::dim(&t_b) {
        panic!("Dimensions are not matching between t_c[_, k] and t_b[k]");
    }
    if dim_number_2 != ::ndarray::ArrayBase::<_, _>::dim(&t_b) {
        panic!("Dimensions are not matching between t_c[_, k] and t_b[k]");
    }
    let strides_a = t_a.strides();
    if strides_a != [dim_number_0 as isize, 1isize] {
        panic!("t_a has unexpected memory layout {:?}", strides_a);
    }
    let p_a = t_a.as_ptr();
    let strides_b = t_b.strides();
    if strides_b != [1isize] {
        panic!("t_b has unexpected memory layout {:?}", strides_b);
    }
    let p_b = t_b.as_ptr();
    let strides_c = t_c.strides();
    if strides_c != [dim_number_2 as isize, 1isize] {
        panic!("t_c has unexpected memory layout {:?}", strides_c);
    }
    let p_c = t_c.as_ptr();
    let strides_d = t_d.strides();
    if strides_d != [1isize] {
        panic!("t_d has unexpected memory layout {:?}", strides_d);
    }
    let p_d = t_d.as_ptr();

    let mut result = ::ndarray::Array::<f64, ::ndarray::Dim<[::ndarray::Ix; 3usize]>>::uninit((
        dim_number_0,
        dim_number_1,
        dim_number_2,
    ));
    let mut ptr_on_result = result.as_mut_ptr() as *mut f64;
    let mut i_1_a: usize = 0;
    let mut i_1_c: usize = 0;
    for _i in 0..dim_number_0 {
        let mut i_2_a: usize = i_1_a;
        let mut i_2_d: usize = 0;
        for _j in 0..dim_number_1 {
            let mut i_3_b: usize = 0;
            let mut i_3_c: usize = i_1_c;
            for _k in 0..dim_number_2 {
                let value = (unsafe { *p_a.add(i_2_a) }) * (unsafe { *p_c.add(i_3_c) })
                    + (unsafe { *p_b.add(i_3_b) }).re() * (unsafe { *p_b.add(i_3_b) }).re()
                    + ((unsafe { *p_d.add(i_2_d) }) as f64).sin();
                unsafe {
                    ptr_on_result.write(value);
                    ptr_on_result = ptr_on_result.add(1);
                }
                i_3_b += 1;
                i_3_c += 1;
            }
            i_2_a += dim_number_0;
            i_2_d += 1;
        }
        i_1_a += 1;
        i_1_c += dim_number_2;
    }
    unsafe { result.assume_init() }
}

fn pure_ndarray(
    t_a: &Array2<f64>,
    t_b: &Array1<C64>,
    t_c: &Array2<f64>,
    t_d: &Array1<i32>,
) -> Array3<f64> {
    let (ni, nj, nk) = (128, 64, 32);

    let mut r = Array3::<_>::uninit((ni, nj, nk));

    let t_a_perm = t_a.view().permuted_axes([1, 0]);
    let t_a_ins = t_a_perm.insert_axis(Axis(2));
    let t_a_v = t_a_ins.broadcast((ni, nj, nk)).unwrap();

    let t_b_ins0 = t_b.view().insert_axis(Axis(0));
    let t_b_ins1 = t_b_ins0.insert_axis(Axis(1));
    let t_b_v = t_b_ins1.broadcast((ni, nj, nk)).unwrap();

    let t_c_ins = t_c.view().insert_axis(Axis(1));
    let t_c_v = t_c_ins.broadcast((ni, nj, nk)).unwrap();

    let t_d_ins0 = t_d.view().insert_axis(Axis(0));
    let t_d_ins1 = t_d_ins0.insert_axis(Axis(2));
    let t_d_v = t_d_ins1.broadcast((ni, nj, nk)).unwrap();

    Zip::from(r.view_mut())
        .and(t_a_v)
        .and(t_b_v)
        .and(t_c_v)
        .and(t_d_v)
        .for_each(|r_cell, &a, &b, &c, &d| {
            r_cell.write(a * c + b.re() * b.re() + (d as f64).sin());
        });

    unsafe { r.assume_init() }
}

fn benchmark(c: &mut Criterion) {
    let (ni, nj, nk) = (128, 64, 32);
    let t_a: Array2<f64> = Array2::from_shape_fn((nj, ni), |(j, i)| {
        1.7 * ((i as f64) - (j as f64) + 0.5).cos()
    });
    let t_b: Array1<C64> =
        Array1::from_shape_fn(nk, |k| C64::from_f64(3.5 * (k as f64 + 2.0).sin()));
    let t_c: Array2<f64> = Array2::from_shape_fn((ni, nk), |(i, k)| ((i * k) as f64 + 10.0).ln());
    let t_d: Array1<i32> = Array1::from_shape_fn(nj, |j| j as i32);

    let result_ndarray = pure_ndarray(&t_a, &t_b, &t_c, &t_d);
    let result_tensorism = tensorism(&t_a, &t_b, &t_c, &t_d);
    let result_flatten_low = flatten_low(&t_a, &t_b, &t_c, &t_d);
    let result_flatten_var = flatten_var(&t_a, &t_b, &t_c, &t_d);
    let result_flatten_high = flatten_high(&t_a, &t_b, &t_c, &t_d);
    assert_eq!(result_tensorism, result_flatten_low);
    assert_eq!(result_tensorism, result_flatten_var);
    assert_eq!(result_tensorism, result_flatten_high);
    assert_eq!(result_ndarray, result_tensorism);

    c.bench_function("Mixed array - Ndarray", |b| {
        b.iter(|| {
            pure_ndarray(
                black_box(&t_a),
                black_box(&t_b),
                black_box(&t_c),
                black_box(&t_d),
            )
        })
    });
    c.bench_function("Mixed array - Tensorism", |b| {
        b.iter(|| {
            tensorism(
                black_box(&t_a),
                black_box(&t_b),
                black_box(&t_c),
                black_box(&t_d),
            )
        })
    });
    c.bench_function("Mixed array - Flatten Low", |b| {
        b.iter(|| {
            flatten_low(
                black_box(&t_a),
                black_box(&t_b),
                black_box(&t_c),
                black_box(&t_d),
            )
        })
    });
    c.bench_function("Mixed array - Flatten Var", |b| {
        b.iter(|| {
            flatten_var(
                black_box(&t_a),
                black_box(&t_b),
                black_box(&t_c),
                black_box(&t_d),
            )
        })
    });
    c.bench_function("Mixed array - Flatten High", |b| {
        b.iter(|| {
            flatten_high(
                black_box(&t_a),
                black_box(&t_b),
                black_box(&t_c),
                black_box(&t_d),
            )
        })
    });
}

criterion_group!(benches, benchmark);
criterion_main!(benches);