1use num_traits::{AsPrimitive, PrimInt, Unsigned};
4use std::collections::{HashMap, HashSet};
5use std::ops::Shr;
6
7#[allow(non_snake_case)]
8pub fn prefetch_read_NTA<T>(data: &[T], offset: usize) {
9 let _p = data.as_ptr().wrapping_add(offset) as *const i8;
10
11 #[cfg(all(feature = "prefetch", any(target_arch = "x86", target_arch = "x86_64")))]
12 {
13 #[cfg(target_arch = "x86")]
14 use std::arch::x86::{_mm_prefetch, _MM_HINT_NTA};
15
16 #[cfg(target_arch = "x86_64")]
17 use std::arch::x86_64::{_mm_prefetch, _MM_HINT_NTA};
18
19 unsafe {
20 _mm_prefetch(_p, _MM_HINT_NTA);
21 }
22 }
23
24 #[cfg(all(feature = "prefetch", target_arch = "aarch64"))]
25 {
26 use core::arch::aarch64::{_prefetch, _PREFETCH_LOCALITY0, _PREFETCH_READ};
27
28 unsafe {
29 _prefetch(_p, _PREFETCH_READ, _PREFETCH_LOCALITY0);
30 }
31 }
32}
33
34const K_SELECT_IN_BYTE: [u8; 2048] = [
36 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
37 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
38 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
39 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
40 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
41 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
42 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
43 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
44 8, 8, 8, 1, 8, 2, 2, 1, 8, 3, 3, 1, 3, 2, 2, 1, 8, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
45 8, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
46 8, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
47 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
48 8, 7, 7, 1, 7, 2, 2, 1, 7, 3, 3, 1, 3, 2, 2, 1, 7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
49 7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
50 7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
51 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
52 8, 8, 8, 8, 8, 8, 8, 2, 8, 8, 8, 3, 8, 3, 3, 2, 8, 8, 8, 4, 8, 4, 4, 2, 8, 4, 4, 3, 4, 3, 3, 2,
53 8, 8, 8, 5, 8, 5, 5, 2, 8, 5, 5, 3, 5, 3, 3, 2, 8, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2,
54 8, 8, 8, 6, 8, 6, 6, 2, 8, 6, 6, 3, 6, 3, 3, 2, 8, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2,
55 8, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2, 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2,
56 8, 8, 8, 7, 8, 7, 7, 2, 8, 7, 7, 3, 7, 3, 3, 2, 8, 7, 7, 4, 7, 4, 4, 2, 7, 4, 4, 3, 4, 3, 3, 2,
57 8, 7, 7, 5, 7, 5, 5, 2, 7, 5, 5, 3, 5, 3, 3, 2, 7, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2,
58 8, 7, 7, 6, 7, 6, 6, 2, 7, 6, 6, 3, 6, 3, 3, 2, 7, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2,
59 7, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2, 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2,
60 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 4, 8, 4, 4, 3,
61 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 5, 8, 5, 5, 3, 8, 8, 8, 5, 8, 5, 5, 4, 8, 5, 5, 4, 5, 4, 4, 3,
62 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6, 6, 3, 8, 8, 8, 6, 8, 6, 6, 4, 8, 6, 6, 4, 6, 4, 4, 3,
63 8, 8, 8, 6, 8, 6, 6, 5, 8, 6, 6, 5, 6, 5, 5, 3, 8, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3,
64 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 3, 8, 8, 8, 7, 8, 7, 7, 4, 8, 7, 7, 4, 7, 4, 4, 3,
65 8, 8, 8, 7, 8, 7, 7, 5, 8, 7, 7, 5, 7, 5, 5, 3, 8, 7, 7, 5, 7, 5, 5, 4, 7, 5, 5, 4, 5, 4, 4, 3,
66 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 3, 8, 7, 7, 6, 7, 6, 6, 4, 7, 6, 6, 4, 6, 4, 4, 3,
67 8, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 3, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3,
68 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4,
69 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 5, 8, 5, 5, 4,
70 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6, 6, 4,
71 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6, 6, 5, 8, 8, 8, 6, 8, 6, 6, 5, 8, 6, 6, 5, 6, 5, 5, 4,
72 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 4,
73 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 5, 8, 8, 8, 7, 8, 7, 7, 5, 8, 7, 7, 5, 7, 5, 5, 4,
74 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 4,
75 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 5, 8, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4,
76 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
77 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5,
78 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6,
79 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6, 6, 5,
80 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7,
81 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 5,
82 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6,
83 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 5,
84 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
85 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
86 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
87 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6,
88 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
89 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7,
90 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7,
91 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6,
92 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
93 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
94 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
95 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
96 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
97 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
98 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
99 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7,
100];
101
102#[inline(always)]
135pub fn select_in_word(word: u64, k: u64) -> u32 {
136 let k_ones_step4 = 0x1111111111111111_u64;
140 let k_ones_step8 = 0x0101010101010101_u64;
141 let k_lambdas_step8 = 0x8080808080808080_u64;
142
143 let mut s = word;
144 s = s - ((s & (0xA * k_ones_step4)) >> 1);
145 s = (s & (0x3 * k_ones_step4)) + ((s >> 2) & (0x3 * k_ones_step4));
146 s = (s + (s >> 4)) & (0xF * k_ones_step8);
147 let byte_sums = s.wrapping_mul(k_ones_step8);
148 let k_step8 = k * k_ones_step8;
150
151 let geq_k_step8 = ((k_step8 | k_lambdas_step8) - byte_sums) & k_lambdas_step8;
153
154 let place = geq_k_step8.count_ones() * 8;
155
156 if place == 64 {
157 return 64;
158 }
159 let byte_rank = k - (((byte_sums << 8) >> place) & 0xFF_u64);
160
161 place + K_SELECT_IN_BYTE[(((word >> place) & 0xFF_u64) | (byte_rank << 8)) as usize] as u32
162}
163
164#[inline(always)]
165pub fn select_in_word_u128(word: u128, k: u64) -> u32 {
166 let first = word as u64;
167
168 let kp = first.count_ones();
169 if kp as u64 > k {
170 select_in_word(first, k)
171 } else {
172 64 + select_in_word((word >> 64) as u64, k - kp as u64)
173 }
174}
175
176#[inline(always)]
178pub fn popcnt_wide<const N: usize>(data: &[u64]) -> usize {
179 let mut res: usize = 0;
180 for &word in data.iter().take(N) {
181 res += word.count_ones() as usize;
182 }
183 res
184}
185
186use std::mem;
187
188use crate::quadwt::huffqwt::PrefixCode;
189
190#[repr(C, align(64))]
191struct AlignToSixtyFour([u8; 64]);
192
193pub unsafe fn get_64byte_aligned_vector<T>(capacity: usize) -> Vec<T> {
201 assert!(mem::size_of::<T>() <= mem::size_of::<AlignToSixtyFour>());
202 assert!(mem::size_of::<AlignToSixtyFour>().is_multiple_of(mem::size_of::<T>())); let n_units = (capacity * mem::size_of::<T>()).div_ceil(mem::size_of::<AlignToSixtyFour>());
205 let mut aligned: Vec<AlignToSixtyFour> = Vec::with_capacity(n_units);
206
207 let ptr = aligned.as_mut_ptr();
208 let len_units = aligned.len();
209 let cap_units = aligned.capacity();
210
211 mem::forget(aligned);
212
213 Vec::from_raw_parts(
214 ptr as *mut T,
215 len_units * mem::size_of::<AlignToSixtyFour>() / mem::size_of::<T>(),
216 cap_units * mem::size_of::<AlignToSixtyFour>() / mem::size_of::<T>(),
217 )
218}
219
220pub fn msb<T>(v: T) -> u32
222where
223 T: PrimInt,
224{
225 if v == T::zero() {
226 return 0;
227 }
228 (std::mem::size_of::<T>() * 8 - 1) as u32 - v.leading_zeros()
229}
230
231pub fn text_remap(input: &mut [u8]) -> usize {
235 let mut unique: Vec<u8> = input
236 .iter_mut()
237 .map(|c| *c)
238 .collect::<HashSet<_>>()
239 .into_iter()
240 .collect();
241 unique.sort();
242 let remap = unique
243 .into_iter()
244 .enumerate()
245 .map(|(i, c)| (c, i))
246 .collect::<HashMap<_, _>>();
247
248 for c in input.iter_mut() {
249 *c = *remap.get(c).unwrap() as u8;
250 }
251
252 remap.len()
253}
254
255pub fn stable_partition_of_4<T>(sequence: &mut [T], shift: usize)
259where
260 T: Unsigned + PrimInt + Ord + Shr<usize> + AsPrimitive<usize>,
261 usize: AsPrimitive<T>,
262{
263 let mut vecs = [Vec::new(), Vec::new(), Vec::new(), Vec::new()];
264
265 for &a in sequence.iter() {
266 let two_bits: usize = (a.as_() >> shift) & 3;
267 vecs[two_bits].push(a);
268 }
269
270 let mut pos = 0;
271 for i in 0..4 {
272 sequence[pos..pos + vecs[i].len()].copy_from_slice(&(vecs[i][..]));
273 pos += vecs[i].len()
274 }
275}
276
277pub fn stable_partition_of_4_with_codes<T>(sequence: &mut [T], shift: usize, codes: &[PrefixCode])
278where
279 T: Unsigned + PrimInt + Ord + Shr<usize> + AsPrimitive<usize>,
280 usize: AsPrimitive<T>,
281{
282 let mut vecs = [Vec::new(), Vec::new(), Vec::new(), Vec::new(), Vec::new()]; for &a in sequence.iter() {
285 let code = &codes[a.as_()];
286 if code.len <= shift as u32 {
287 vecs[4].push(a);
289 } else {
290 let two_bits = (code.content >> (code.len - shift as u32)) & 3;
292 vecs[two_bits as usize].push(a);
293 }
294 }
295
296 let mut pos = 0;
297 for i in 0..5 {
298 sequence[pos..pos + vecs[i].len()].copy_from_slice(&(vecs[i][..]));
299 pos += vecs[i].len()
300 }
301}
302
303pub fn stable_partition_of_2_with_codes<T>(sequence: &mut [T], shift: usize, codes: &[PrefixCode])
304where
305 T: Unsigned + PrimInt + Ord + Shr<usize> + AsPrimitive<usize>,
306 usize: AsPrimitive<T>,
307{
308 let mut vecs = [Vec::new(), Vec::new(), Vec::new()];
309
310 for &a in sequence.iter() {
311 let code = &codes[a.as_()];
312 if code.len <= shift as u32 {
313 vecs[2].push(a);
315 } else {
316 let bit = (code.content >> (code.len - shift as u32)) & 1;
317 vecs[bit as usize].push(a);
318 }
319 }
320
321 let mut pos = 0;
322 for i in 0..3 {
323 sequence[pos..pos + vecs[i].len()].copy_from_slice(&(vecs[i][..]));
324 pos += vecs[i].len()
325 }
326}
327
328pub fn stable_partition_of_2<T>(sequence: &mut [T], shift: usize)
329where
330 T: Unsigned + PrimInt + Ord + Shr<usize> + AsPrimitive<usize>,
331 usize: AsPrimitive<T>,
332{
333 let mut vecs = [Vec::new(), Vec::new()];
334
335 for &a in sequence.iter() {
336 let bit = (a >> shift).as_() & 1;
337 vecs[bit].push(a);
338 }
339
340 let mut pos = 0;
341 for i in 0..2 {
342 sequence[pos..pos + vecs[i].len()].copy_from_slice(&(vecs[i][..]));
343 pos += vecs[i].len()
344 }
345}
346
347#[cfg(test)]
348mod tests;