binary_matrix 0.1.4

Dense binary matrix operations.
Documentation
/// Based on Hacker's Delight (1st edition), Figure 7-3, unrolled.
macro_rules! swap {
    ( $x:expr, $y:expr, $j:expr, $m:expr ) => {
        let t = ($y ^ ($x >> $j)) & $m;
        $y ^= t;
        $x ^= t << $j;
    };
}

#[allow(dead_code)]
pub(crate) fn transpose_unroll_64x64(a: &mut [u64; 64]) {
    let mut a0 = a[0];
    let mut a1 = a[1];
    let mut a2 = a[2];
    let mut a3 = a[3];
    let mut a4 = a[4];
    let mut a5 = a[5];
    let mut a6 = a[6];
    let mut a7 = a[7];
    let mut a8 = a[8];
    let mut a9 = a[9];
    let mut a10 = a[10];
    let mut a11 = a[11];
    let mut a12 = a[12];
    let mut a13 = a[13];
    let mut a14 = a[14];
    let mut a15 = a[15];
    let mut a16 = a[16];
    let mut a17 = a[17];
    let mut a18 = a[18];
    let mut a19 = a[19];
    let mut a20 = a[20];
    let mut a21 = a[21];
    let mut a22 = a[22];
    let mut a23 = a[23];
    let mut a24 = a[24];
    let mut a25 = a[25];
    let mut a26 = a[26];
    let mut a27 = a[27];
    let mut a28 = a[28];
    let mut a29 = a[29];
    let mut a30 = a[30];
    let mut a31 = a[31];
    let mut a32 = a[32];
    let mut a33 = a[33];
    let mut a34 = a[34];
    let mut a35 = a[35];
    let mut a36 = a[36];
    let mut a37 = a[37];
    let mut a38 = a[38];
    let mut a39 = a[39];
    let mut a40 = a[40];
    let mut a41 = a[41];
    let mut a42 = a[42];
    let mut a43 = a[43];
    let mut a44 = a[44];
    let mut a45 = a[45];
    let mut a46 = a[46];
    let mut a47 = a[47];
    let mut a48 = a[48];
    let mut a49 = a[49];
    let mut a50 = a[50];
    let mut a51 = a[51];
    let mut a52 = a[52];
    let mut a53 = a[53];
    let mut a54 = a[54];
    let mut a55 = a[55];
    let mut a56 = a[56];
    let mut a57 = a[57];
    let mut a58 = a[58];
    let mut a59 = a[59];
    let mut a60 = a[60];
    let mut a61 = a[61];
    let mut a62 = a[62];
    let mut a63 = a[63];

    let m = 0xffffffffu64;
    swap!(a0, a32, 32, m);
    swap!(a1, a33, 32, m);
    swap!(a2, a34, 32, m);
    swap!(a3, a35, 32, m);
    swap!(a4, a36, 32, m);
    swap!(a5, a37, 32, m);
    swap!(a6, a38, 32, m);
    swap!(a7, a39, 32, m);
    swap!(a8, a40, 32, m);
    swap!(a9, a41, 32, m);
    swap!(a10, a42, 32, m);
    swap!(a11, a43, 32, m);
    swap!(a12, a44, 32, m);
    swap!(a13, a45, 32, m);
    swap!(a14, a46, 32, m);
    swap!(a15, a47, 32, m);
    swap!(a16, a48, 32, m);
    swap!(a17, a49, 32, m);
    swap!(a18, a50, 32, m);
    swap!(a19, a51, 32, m);
    swap!(a20, a52, 32, m);
    swap!(a21, a53, 32, m);
    swap!(a22, a54, 32, m);
    swap!(a23, a55, 32, m);
    swap!(a24, a56, 32, m);
    swap!(a25, a57, 32, m);
    swap!(a26, a58, 32, m);
    swap!(a27, a59, 32, m);
    swap!(a28, a60, 32, m);
    swap!(a29, a61, 32, m);
    swap!(a30, a62, 32, m);
    swap!(a31, a63, 32, m);

    let m = 0xffff0000ffffu64;
    swap!(a0, a16, 16, m);
    swap!(a1, a17, 16, m);
    swap!(a2, a18, 16, m);
    swap!(a3, a19, 16, m);
    swap!(a4, a20, 16, m);
    swap!(a5, a21, 16, m);
    swap!(a6, a22, 16, m);
    swap!(a7, a23, 16, m);
    swap!(a8, a24, 16, m);
    swap!(a9, a25, 16, m);
    swap!(a10, a26, 16, m);
    swap!(a11, a27, 16, m);
    swap!(a12, a28, 16, m);
    swap!(a13, a29, 16, m);
    swap!(a14, a30, 16, m);
    swap!(a15, a31, 16, m);
    swap!(a32, a48, 16, m);
    swap!(a33, a49, 16, m);
    swap!(a34, a50, 16, m);
    swap!(a35, a51, 16, m);
    swap!(a36, a52, 16, m);
    swap!(a37, a53, 16, m);
    swap!(a38, a54, 16, m);
    swap!(a39, a55, 16, m);
    swap!(a40, a56, 16, m);
    swap!(a41, a57, 16, m);
    swap!(a42, a58, 16, m);
    swap!(a43, a59, 16, m);
    swap!(a44, a60, 16, m);
    swap!(a45, a61, 16, m);
    swap!(a46, a62, 16, m);
    swap!(a47, a63, 16, m);

    let m = 0xff00ff00ff00ffu64;
    swap!(a0, a8, 8, m);
    swap!(a1, a9, 8, m);
    swap!(a2, a10, 8, m);
    swap!(a3, a11, 8, m);
    swap!(a4, a12, 8, m);
    swap!(a5, a13, 8, m);
    swap!(a6, a14, 8, m);
    swap!(a7, a15, 8, m);
    swap!(a16, a24, 8, m);
    swap!(a17, a25, 8, m);
    swap!(a18, a26, 8, m);
    swap!(a19, a27, 8, m);
    swap!(a20, a28, 8, m);
    swap!(a21, a29, 8, m);
    swap!(a22, a30, 8, m);
    swap!(a23, a31, 8, m);
    swap!(a32, a40, 8, m);
    swap!(a33, a41, 8, m);
    swap!(a34, a42, 8, m);
    swap!(a35, a43, 8, m);
    swap!(a36, a44, 8, m);
    swap!(a37, a45, 8, m);
    swap!(a38, a46, 8, m);
    swap!(a39, a47, 8, m);
    swap!(a48, a56, 8, m);
    swap!(a49, a57, 8, m);
    swap!(a50, a58, 8, m);
    swap!(a51, a59, 8, m);
    swap!(a52, a60, 8, m);
    swap!(a53, a61, 8, m);
    swap!(a54, a62, 8, m);
    swap!(a55, a63, 8, m);

    let m = 0xf0f0f0f0f0f0f0fu64;
    swap!(a0, a4, 4, m);
    swap!(a1, a5, 4, m);
    swap!(a2, a6, 4, m);
    swap!(a3, a7, 4, m);
    swap!(a8, a12, 4, m);
    swap!(a9, a13, 4, m);
    swap!(a10, a14, 4, m);
    swap!(a11, a15, 4, m);
    swap!(a16, a20, 4, m);
    swap!(a17, a21, 4, m);
    swap!(a18, a22, 4, m);
    swap!(a19, a23, 4, m);
    swap!(a24, a28, 4, m);
    swap!(a25, a29, 4, m);
    swap!(a26, a30, 4, m);
    swap!(a27, a31, 4, m);
    swap!(a32, a36, 4, m);
    swap!(a33, a37, 4, m);
    swap!(a34, a38, 4, m);
    swap!(a35, a39, 4, m);
    swap!(a40, a44, 4, m);
    swap!(a41, a45, 4, m);
    swap!(a42, a46, 4, m);
    swap!(a43, a47, 4, m);
    swap!(a48, a52, 4, m);
    swap!(a49, a53, 4, m);
    swap!(a50, a54, 4, m);
    swap!(a51, a55, 4, m);
    swap!(a56, a60, 4, m);
    swap!(a57, a61, 4, m);
    swap!(a58, a62, 4, m);
    swap!(a59, a63, 4, m);

    let m = 0x3333333333333333;
    swap!(a0, a2, 2, m);
    swap!(a1, a3, 2, m);
    swap!(a4, a6, 2, m);
    swap!(a5, a7, 2, m);
    swap!(a8, a10, 2, m);
    swap!(a9, a11, 2, m);
    swap!(a12, a14, 2, m);
    swap!(a13, a15, 2, m);
    swap!(a16, a18, 2, m);
    swap!(a17, a19, 2, m);
    swap!(a20, a22, 2, m);
    swap!(a21, a23, 2, m);
    swap!(a24, a26, 2, m);
    swap!(a25, a27, 2, m);
    swap!(a28, a30, 2, m);
    swap!(a29, a31, 2, m);
    swap!(a32, a34, 2, m);
    swap!(a33, a35, 2, m);
    swap!(a36, a38, 2, m);
    swap!(a37, a39, 2, m);
    swap!(a40, a42, 2, m);
    swap!(a41, a43, 2, m);
    swap!(a44, a46, 2, m);
    swap!(a45, a47, 2, m);
    swap!(a48, a50, 2, m);
    swap!(a49, a51, 2, m);
    swap!(a52, a54, 2, m);
    swap!(a53, a55, 2, m);
    swap!(a56, a58, 2, m);
    swap!(a57, a59, 2, m);
    swap!(a60, a62, 2, m);
    swap!(a61, a63, 2, m);
    let m = 0x5555555555555555;
    swap!(a0, a1, 1, m);
    swap!(a2, a3, 1, m);
    swap!(a4, a5, 1, m);
    swap!(a6, a7, 1, m);
    swap!(a8, a9, 1, m);
    swap!(a10, a11, 1, m);
    swap!(a12, a13, 1, m);
    swap!(a14, a15, 1, m);
    swap!(a16, a17, 1, m);
    swap!(a18, a19, 1, m);
    swap!(a20, a21, 1, m);
    swap!(a22, a23, 1, m);
    swap!(a24, a25, 1, m);
    swap!(a26, a27, 1, m);
    swap!(a28, a29, 1, m);
    swap!(a30, a31, 1, m);
    swap!(a32, a33, 1, m);
    swap!(a34, a35, 1, m);
    swap!(a36, a37, 1, m);
    swap!(a38, a39, 1, m);
    swap!(a40, a41, 1, m);
    swap!(a42, a43, 1, m);
    swap!(a44, a45, 1, m);
    swap!(a46, a47, 1, m);
    swap!(a48, a49, 1, m);
    swap!(a50, a51, 1, m);
    swap!(a52, a53, 1, m);
    swap!(a54, a55, 1, m);
    swap!(a56, a57, 1, m);
    swap!(a58, a59, 1, m);
    swap!(a60, a61, 1, m);
    swap!(a62, a63, 1, m);

    a[0] = a0;
    a[1] = a1;
    a[2] = a2;
    a[3] = a3;
    a[4] = a4;
    a[5] = a5;
    a[6] = a6;
    a[7] = a7;
    a[8] = a8;
    a[9] = a9;
    a[10] = a10;
    a[11] = a11;
    a[12] = a12;
    a[13] = a13;
    a[14] = a14;
    a[15] = a15;
    a[16] = a16;
    a[17] = a17;
    a[18] = a18;
    a[19] = a19;
    a[20] = a20;
    a[21] = a21;
    a[22] = a22;
    a[23] = a23;
    a[24] = a24;
    a[25] = a25;
    a[26] = a26;
    a[27] = a27;
    a[28] = a28;
    a[29] = a29;
    a[30] = a30;
    a[31] = a31;
    a[32] = a32;
    a[33] = a33;
    a[34] = a34;
    a[35] = a35;
    a[36] = a36;
    a[37] = a37;
    a[38] = a38;
    a[39] = a39;
    a[40] = a40;
    a[41] = a41;
    a[42] = a42;
    a[43] = a43;
    a[44] = a44;
    a[45] = a45;
    a[46] = a46;
    a[47] = a47;
    a[48] = a48;
    a[49] = a49;
    a[50] = a50;
    a[51] = a51;
    a[52] = a52;
    a[53] = a53;
    a[54] = a54;
    a[55] = a55;
    a[56] = a56;
    a[57] = a57;
    a[58] = a58;
    a[59] = a59;
    a[60] = a60;
    a[61] = a61;
    a[62] = a62;
    a[63] = a63;
}

#[cfg(test)]
#[cfg(feature = "rand")]
mod tests {
    use crate::base_matrix::slow_transpose;
    use crate::transpose64x64_unroll::transpose_unroll_64x64;
    use crate::BinaryMatrix64;
    use rand::SeedableRng;
    use rand_chacha::ChaCha8Rng;

    #[test]
    fn test_transpose_64x64_unroll() {
        let mut rng = ChaCha8Rng::seed_from_u64(1234);
        let mat = BinaryMatrix64::random(64, 64, &mut rng);
        let mut newmat = BinaryMatrix64::new();
        let mut a = mat.submatrix64(0, 0).cols;
        transpose_unroll_64x64(&mut a);
        slow_transpose(mat.as_ref(), &mut newmat);
        let b = newmat.submatrix64(0, 0).cols;
        assert_eq!(a, b);
    }
}

#[cfg(test)]
mod bench {
    extern crate test;
    use crate::transpose64x64_unroll::transpose_unroll_64x64;
    use test::bench::Bencher;

    #[bench]
    fn bench_transpose64x64_unroll(b: &mut Bencher) {
        let mut mat = [0u64; 64];
        mat[0] = 0xffffffffffffffff;
        b.iter(|| {
            test::black_box(transpose_unroll_64x64(&mut mat));
        });
    }
}