1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
//! pdep
/// Parallel bits deposit
pub trait Pdep {
/// Parallel bits deposit.
///
/// Scatter contiguous low order bits of `x` to the result at the positions
/// specified by the `mask`.
///
/// All other bits (bits not set in the `mask`) of the result are set to
/// zero.
///
/// **Keywords**: Parallel bits deposit, scatter bits.
///
/// # Instructions
///
/// - [`PDEP`](http://www.felixcloutier.com/x86/PDEP.html):
/// - Description: Parallel bits deposit.
/// - Architecture: x86.
/// - Instruction set: BMI2.
/// - Registers: 32/64 bit.
///
/// # Example
///
/// ```
/// # use bitintr::*;
/// let n = 0b1011_1110_1001_0011u16;
///
/// let m0 = 0b0110_0011_1000_0101u16;
/// let s0 = 0b0000_0010_0000_0101u16;
///
/// let m1 = 0b1110_1011_1110_1111u16;
/// let s1 = 0b1110_1001_0010_0011u16;
///
/// assert_eq!(n.pdep(m0), s0);
/// assert_eq!(n.pdep(m1), s1);
/// ```
fn pdep(self, mask: Self) -> Self;
}
macro_rules! pdep_impl {
($ty:ty) => {
#[inline]
fn pdep_(value: $ty, mut mask: $ty) -> $ty {
let mut res = 0;
let mut bb = 1;
loop {
if mask == 0 {
break;
}
if (value & bb) != 0 {
res |= mask & mask.wrapping_neg();
}
mask &= mask - 1;
bb += bb;
}
res
}
};
($ty:ty, $intr:ident) => {
cfg_if! {
if #[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "bmi2"
))] {
#[inline]
#[target_feature(enable = "bmi2")]
unsafe fn pdep_(value: $ty, mask: $ty) -> $ty {
crate::arch::$intr(
value as _,
mask as _,
) as _
}
} else {
pdep_impl!($ty);
}
}
};
}
macro_rules! impl_pdep {
($id:ident $(,$args:ident)*) => {
impl Pdep for $id {
#[inline]
#[allow(unused_unsafe)]
fn pdep(self, mask: Self) -> Self {
pdep_impl!($id $(,$args)*);
// UNSAFETY: this is always safe, because
// the unsafe `#[target_feature]` function
// is only generated when the feature is
// statically-enabled at compile-time.
unsafe { pdep_(self, mask) }
}
}
}
}
impl_all!(impl_pdep: u8, u16, i8, i16);
cfg_if! {
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
impl_pdep!(u32, _pdep_u32);
impl_pdep!(i32, _pdep_u32);
cfg_if! {
if #[cfg(target_arch = "x86_64")] {
impl_pdep!(u64, _pdep_u64);
impl_pdep!(i64, _pdep_u64);
} else {
impl_all!(impl_pdep: i64, u64);
}
}
} else {
impl_all!(impl_pdep: u32, i32, i64, u64);
}
}