1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
use crate::Bits;
use crate::IntoBits;

macro_rules! mask {
    ($Type:ty, $Range:expr) => {
        (((1 as $Type) << $Range.end()) - ((1 as $Type) << $Range.start()))
            | ((1 as $Type) << $Range.end())
    };
}

/// bits 的实际操作
pub trait BitsOps<T> {
    #[must_use = "set function dosen't modify the self in place, you should assign to it explicitly"]
    fn set(&self) -> T;
    
    #[must_use = "clr function dosen't modify the self in place, you should assign to it explicitly"]
    fn clr(&self) -> T;

    #[must_use = "revert function dosen't modify the self in place, you should assign to it explicitly"]
    fn revert(&self) -> T;

    #[must_use = "write function dosen't modify the self in place, you should assign to it explicitly"]
    fn write(&self, value: T) -> T;

    fn read(&self) -> T;
    fn is_clr(&self) -> bool;
    fn is_set(&self) -> bool;
    fn count_ones(&self) -> u32;
    fn msb(&self) -> bool;
    fn lsb(&self) -> bool;
}

macro_rules! impl_bitsops {
    ($($Type:ty) *) => {
        $(impl BitsOps<$Type> for Bits<$Type> {
            #[inline]
            fn msb(&self) -> bool {
                // let mask = mask!($Type, *self.range.end() ..= *self.range.end());
                // (self.value & mask) != 0
                self.value.bits(*self.range.end() ..= *self.range.end()).is_set()
            }
            #[inline]
            fn lsb(&self) -> bool {
                self.value.bits(*self.range.start() ..= *self.range.start()).is_set()
                // let mask = mask!($Type, *self.range.start() ..= *self.range.start());
                // (self.value & mask) != 0
            }
            #[inline]
            fn set(&self) -> $Type {
                let mask = mask!($Type, self.range);
                self.value | mask
            }
            #[inline]
            fn clr(&self) -> $Type {
                let mask = mask!($Type, self.range);
                self.value & (!mask)
            }
            #[inline]
            fn revert(&self) -> $Type {
                let mask = mask!($Type, self.range);
                self.value ^ mask
            }
            #[inline]
            fn write(&self, value: $Type) -> $Type {
                let mask = mask!($Type, self.range);
                (self.value & (!mask)) | ((value << self.range.start()) & mask)
            }
            #[inline]
            fn read(&self) -> $Type {
                let mask = mask!($Type, self.range);
                (self.value & mask) >> self.range.start()
            }
            #[inline]
            fn is_clr(&self) -> bool {
                self.read() == 0
            }
            #[inline]
            fn is_set(&self) -> bool {
                let mask = mask!($Type, self.range);
                (self.value & mask) == mask
            }
            /// 运行效率和标准库(编译器内部提供的)不相上下。
            #[inline]
            fn count_ones(&self) -> u32 {
                use core::convert::TryInto;
                let mut ret = self.read();
                let mut i = 1;
                while i <= core::mem::size_of::<$Type>() * 4 {
                    let max = !(0 as $Type);
                    let div = (1 << i) + 1;
                    let a:$Type = max / div;
                    let b:$Type = a << i;
                    ret = (a & ret) + ((b & ret) >> i);
                    i = i << 1;
                }
                ret.try_into().unwrap()
            }
        })*
    };
}
impl_bitsops!(u8 u16 u32 u64 u128 usize);

/// 这是一个示例,旨在演示思路
/// 1. 先每两个 bit 为一组计数,并且每一组之间可以并行计算。(利用了加法器的 bit 间的并行性)
/// 2. 合并,得出每 4 个 bit 为一组的计数
/// 3. 再次合并,得出每 8 个 bit 为一组的计数。
/// 4. 如果是单字节,则到此结束,否则以此类推下去。
#[inline]
fn __count_ones_u8(data: u8) -> u32 {
    let x1 = data & 0b0101_0101;
    let x2 = (data & 0b1010_1010) >> 1;

    let y = x1 + x2;
    let y1 = (y & 0b1100_1100) >> 2;
    let y2 = y & 0b0011_0011;

    let z = y1 + y2;
    let z1 = z & 0b0000_1111;
    let z2 = (z & 0b1111_0000) >> 4;

    return (z2 + z1) as u32;
}

/// 这是一个示例,旨在演示思路,实际上在计算 x1 和 x2 时没有并行。
/// 所以利用 u8 来计算 u16 不是一个好的做法,
/// 沿着 __count_ones_u8 的思路才是正道。
#[no_mangle]
fn __count_ones_u16(data: u16) -> u32 {
    let x1 = __count_ones_u8(data.to_ne_bytes()[1]);
    let x2 = __count_ones_u8(data.to_ne_bytes()[0]);
    x1 + x2
}