#![feature(portable_simd)]
mod impls;
mod simd;
use std::{array::from_fn, cmp::Reverse, iter::repeat_n};
#[cfg(not(feature = "u64"))]
pub const T_U32: bool = true;
#[cfg(not(feature = "u64"))]
pub type T = u32;
#[cfg(not(feature = "u64"))]
const L: usize = 8;
#[cfg(not(feature = "u64"))]
type S = std::simd::u32x8;
#[cfg(feature = "u64")]
pub const T_U32: bool = false;
#[cfg(feature = "u64")]
pub type T = u64;
#[cfg(feature = "u64")]
const L: usize = 4;
#[cfg(feature = "u64")]
type S = std::simd::u64x4;
pub trait Heap {
fn default() -> Self;
fn push(&mut self, t: T);
fn pop(&mut self) -> Option<T>;
}
pub struct QuickHeap<const N: usize, const M: usize> {
layer: usize,
pivots: Vec<T>,
buckets: Vec<Vec<T>>,
}
impl<const N: usize, const M: usize> Heap for QuickHeap<N, M> {
fn default() -> Self {
let mut pivots = vec![0; 128];
pivots[0] = T::MAX;
Self {
layer: 0,
pivots,
buckets: vec![vec![]; 128],
}
}
#[inline(never)]
fn push(&mut self, t: T) {
let target_layer = simd::push_position(&self.pivots, self.layer, t);
self.buckets[target_layer].reserve(L + 1);
self.buckets[target_layer].push(t);
}
#[inline(never)]
fn pop(&mut self) -> Option<T> {
if self.buckets[self.layer].len() == 0 {
return None;
}
while self.buckets[self.layer].len() > N {
self.partition();
}
let layer = &mut self.buckets[self.layer];
let min_pos = simd::position_min(layer);
let min = layer.swap_remove(min_pos);
if layer.is_empty() && self.layer > 0 {
self.pivots[self.layer] = 0;
self.layer -= 1;
}
Some(min)
}
}
impl<const N: usize, const M: usize> QuickHeap<N, M> {
#[inline(never)]
fn partition(&mut self) {
if self.layer + 2 == self.pivots.len() {
self.pivots.extend(repeat_n(0, L));
self.buckets.extend(repeat_n(vec![], L));
}
let [cur_layer, next_layer] = &mut self.buckets[self.layer..=self.layer + 1] else {
unreachable!()
};
let n = cur_layer.len();
let mut pivots: [(T, usize); M] = from_fn(|_| {
let pos = rand::random_range(0..n);
(cur_layer[pos], pos)
});
pivots.sort();
let pivot = pivots[M / 2].0;
let pivot_pos = pivots[M / 2].1;
self.pivots[self.layer + 1] = pivot + 1;
cur_layer.reserve(L);
next_layer.clear();
next_layer.reserve(n + L);
unsafe { cur_layer.set_len(n + L) };
unsafe { next_layer.set_len(n + L) };
let mut cur_len = 0;
let mut next_len = 0;
let half = (pivot_pos + 1).next_multiple_of(L);
for i in (0..half).step_by(L) {
let vals: [T; L] = cur_layer[i..i + L].try_into().unwrap();
simd::partition(
S::from_array(vals),
n - i,
pivot + 1,
cur_layer,
&mut cur_len,
next_layer,
&mut next_len,
);
}
for i in (half..n).step_by(L) {
let vals: [T; L] = cur_layer[i..i + L].try_into().unwrap();
simd::partition(
S::from_array(vals),
n - i,
pivot,
cur_layer,
&mut cur_len,
next_layer,
&mut next_len,
);
}
debug_assert!(next_len > 0);
unsafe {
cur_layer.set_len(cur_len);
next_layer.set_len(next_len);
}
if cur_len == 0 {
std::mem::swap(cur_layer, next_layer);
return;
}
self.layer += 1;
}
}