trait DoubleEndedBitstream {
fn pull(&mut self) -> Option<bool>;
fn push(&mut self, bit: bool) -> Result<(), ()>;
fn pull_bits(&mut self, bits: u8) -> Option<u32> {
let mut accum = 0;
for _ in 0 .. bits {
accum <<= 1;
accum |= self.pull()? as u32;
}
Some(accum)
}
fn push_bits(&mut self, bits: u8, value: u32) -> Result<(), ()> {
let mut i = bits;
while i != 0 {
i -= 1;
self.push(value & (1 << i) != 0)?;
}
Ok(())
}
}
struct BitStreamReadWriter<'a> {
buf: &'a mut [u32],
readidx: usize,
writeidx: usize,
}
impl<'a> BitStreamReadWriter<'a> {
pub fn new(buf: &mut [u32], readidx: usize, writeidx: usize) -> BitStreamReadWriter {
BitStreamReadWriter {
buf,
readidx,
writeidx,
}
}
pub fn get_writeidx(&self) -> usize {
self.writeidx
}
pub fn get_readidx(&self) -> usize {
self.readidx
}
}
impl<'a> DoubleEndedBitstream for BitStreamReadWriter<'a> {
fn pull(&mut self) -> Option<bool> {
if self.readidx + 1 == self.writeidx {
return None;
}
let rv = self.buf[self.readidx / 32] & (1 << (self.readidx % 32)) != 0;
self.readidx += 1;
Some(rv)
}
fn push(&mut self, bit: bool) -> Result<(), ()> {
if self.writeidx + 1 == self.readidx {
return Err(())
}
let mut val = self.buf[self.writeidx / 32];
val &= !(1 << (self.writeidx % 32));
val |= (bit as u32) << (self.writeidx % 32);
self.buf[self.writeidx / 32] = val;
self.writeidx += 1;
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct TinySort {
amount: u32,
maxval: u32,
extra: usize,
boundary: u32,
minrange: u32,
buf: Vec<u32>,
committed: usize,
committed_len: usize,
sort_cap: usize,
sort_pending: usize,
}
impl TinySort {
pub fn new(extra: usize, amount: u32, maxval: u32) -> Option<TinySort> {
if extra == 0 || amount < 2 || maxval < 1 {
return None;
}
let ratio = maxval as f64 / amount as f64;
let required = (amount as f64) * (ratio + 1.0f64).log2() + (maxval as f64) * (1.0 / ratio + 1.0).log2();
let bufsize = ((1.0001 * required) as usize) / 32 + 5 + extra;
let buf = vec![0; bufsize];
let minrange;
let boundary;
if ratio >= 1. {
minrange = ratio.ceil() as u32 + 1;
boundary = 0xFFFF_FFFFu32 / minrange as u32;
} else {
minrange = (1. / ratio).ceil() as u32 + 1;
boundary = 0xFFFF_FFFF - 0xFFFF_FFFFu32 / minrange as u32;
}
let committed = 0;
let committed_len = 1;
let mut rv = TinySort {
amount,
maxval,
extra,
boundary,
minrange,
buf,
committed,
committed_len,
sort_cap: extra,
sort_pending: 0,
};
rv.calc_new_sort_cap();
Some(rv)
}
pub fn used_space(&self) -> usize {
std::mem::size_of::<TinySort>() + 4 * self.buf.len()
}
pub fn insert(&mut self, value: u32) {
let idx = self.buf.len() - self.sort_cap + self.sort_pending;
self.buf[idx] = value;
self.sort_pending += 1;
if self.sort_pending == self.sort_cap {
self.commit();
}
}
fn commit(&mut self) {
let idx = self.buf.len() - self.sort_cap;
let (compressed_buf, mut presort_buf) = self.buf.split_at_mut(idx);
presort_buf = &mut presort_buf[..self.sort_pending];
presort_buf.sort_unstable();
compressed_buf.copy_within(0 .. self.committed_len, idx - self.committed_len);
let mut storage = BitStreamReadWriter::new(compressed_buf, (idx - self.committed_len) * 32, 0);
let mut encoder = ArithmeticCoder::new(self.boundary, self.minrange);
let mut decoder = ArithmeticDecoder::new(self.boundary, self.minrange, &mut storage);
let mut storage_iter = 0 .. self.committed;
let mut new_iter = presort_buf.iter().cloned();
let mut storage_num = storage_iter.next().map(|_| decoder.decode_number(&mut storage));
let mut new_num = new_iter.next();
let mut last_num = 0u32;
loop {
match (storage_num, new_num) {
(None, None) => break,
(None, Some(n)) => {
encoder.encode_number(&mut storage, n - last_num);
last_num = n;
new_num = new_iter.next();
},
(Some(s), Some(n)) if s > n => {
encoder.encode_number(&mut storage, n - last_num);
last_num = n;
new_num = new_iter.next();
},
(Some(s), _) => {
encoder.encode_number(&mut storage, s - last_num);
last_num = s;
storage_num = storage_iter.next().map(|_| decoder.decode_number(&mut storage) + s);
}
}
}
encoder.flush(&mut storage);
self.committed += self.sort_pending;
self.sort_pending = 0;
self.committed_len = (storage.get_writeidx() + 31) / 32;
self.calc_new_sort_cap();
}
fn calc_new_sort_cap(&mut self) {
let mut sort_cap = self.extra;
let ratio = self.maxval as f64 / self.amount as f64;
loop {
let new_sort_cap = sort_cap + self.extra;
let next_committed = self.committed + new_sort_cap;
let theoretical_bits_required = (next_committed as f64) * (ratio + 1.0f64).log2() + (self.maxval as f64) * (1.0 / ratio + 1.0).log2();
let words_required = ((1.0001 * theoretical_bits_required) as usize) / 32 + 5;
if words_required + new_sort_cap > self.buf.len() {
break;
}
sort_cap = new_sort_cap;
}
self.sort_cap = sort_cap;
}
}
impl IntoIterator for TinySort {
type Item = u32;
type IntoIter = TinySortIterator;
fn into_iter(mut self) -> TinySortIterator {
if self.sort_pending != 0 {
self.commit();
}
let mut storage = BitStreamReadWriter::new(&mut self.buf, 0, 0);
let decoder = ArithmeticDecoder::new(self.boundary, self.minrange, &mut storage);
let readidx = storage.get_readidx();
TinySortIterator {
buf: self.buf,
committed: self.committed,
accum: 0,
decoder,
readidx,
}
}
}
#[derive(Debug, Clone)]
pub struct TinySortIterator {
buf: Vec<u32>,
committed: usize,
accum: u32,
decoder: ArithmeticDecoder,
readidx: usize,
}
impl Iterator for TinySortIterator {
type Item = u32;
fn next(&mut self) -> Option<u32> {
if self.committed == 0 {
return None;
}
self.committed -= 1;
let mut storage = BitStreamReadWriter::new(&mut self.buf, self.readidx, 0);
let decoded = self.decoder.decode_number(&mut storage);
self.readidx = storage.get_readidx();
self.accum += decoded;
Some(self.accum)
}
}
#[derive(Debug, Clone)]
struct ArithmeticCoder {
bottom: u32,
range: u64,
boundary: u32,
minrange: u32,
}
impl ArithmeticCoder {
fn new(boundary: u32, minrange: u32) -> ArithmeticCoder {
ArithmeticCoder {
bottom: 0,
range: 0xFFFF_FFFF,
boundary,
minrange
}
}
fn encode<W: DoubleEndedBitstream>(&mut self, bitstream: &mut W, bit: bool) {
let mut a: u32;
let mut b: u32;
if !bit {
a = self.bottom;
b = self.bottom + ((self.range * (self.boundary as u64)) >> 32) as u32;
} else {
a = self.bottom + 1 + ((self.range * (self.boundary as u64)) >> 32) as u32;
b = self.bottom + self.range as u32;
}
while (a ^ b) & 0x8000_0000 == 0 {
bitstream.push(a & 0x8000_0000 != 0).unwrap();
a <<= 1;
b = (b << 1) | 1;
}
if (b - a) <= self.minrange {
if (b - 0x7FFF_FFFF) < (0x8000_0000 - a) {
b = 0x7FFF_FFFF;
} else {
a = 0x8000_0000;
}
while (a ^ b) & 0x8000_0000 == 0 {
bitstream.push(a & 0x8000_0000 != 0).unwrap();
a <<= 1;
b = (b << 1) | 1;
}
}
self.bottom = a;
self.range = (b - a) as u64;
}
fn encode_number<W: DoubleEndedBitstream>(&mut self, bitstream: &mut W, value: u32) {
for _ in 0 .. value {
self.encode(bitstream, true);
}
self.encode(bitstream, false);
}
fn flush<W: DoubleEndedBitstream>(&mut self, bitstream: &mut W) {
bitstream.push_bits(32, self.bottom + (self.range as u32)/ 2).unwrap();
}
}
#[derive(Debug, Clone)]
struct ArithmeticDecoder {
bottom: u32,
range: u64,
boundary: u32,
minrange: u32,
workbuf: u32
}
impl ArithmeticDecoder {
fn new<W: DoubleEndedBitstream>(boundary: u32, minrange: u32, bitstream: &mut W) -> ArithmeticDecoder {
ArithmeticDecoder {
bottom: 0,
range: 0xFFFF_FFFF,
boundary,
minrange,
workbuf: bitstream.pull_bits(32).unwrap(),
}
}
fn decode<W: DoubleEndedBitstream>(&mut self, bitstream: &mut W) -> bool {
let mut a: u32;
let mut b: u32;
a = self.bottom;
b = self.bottom + ((self.range * (self.boundary as u64)) >> 32) as u32;
let rv = self.workbuf > b;
if rv {
a = b + 1;
b = self.bottom + self.range as u32;
}
while (a ^ b) & 0x8000_0000 == 0 {
a <<= 1;
b = (b << 1) | 1;
self.workbuf = (self.workbuf << 1) | (bitstream.pull().unwrap() as u32);
}
if (b - a) <= self.minrange {
if (b - 0x7FFF_FFFF) < (0x8000_0000 - a) {
b = 0x7FFF_FFFF;
} else {
a = 0x8000_0000;
}
while (a ^ b) & 0x8000_0000 == 0 {
a <<= 1;
b = (b << 1) | 1;
self.workbuf = (self.workbuf << 1) | (bitstream.pull().unwrap() as u32);
}
}
self.bottom = a;
self.range = (b - a) as u64;
rv
}
fn decode_number<W: DoubleEndedBitstream>(&mut self, bitstream: &mut W) -> u32 {
let mut i = 0;
while self.decode(bitstream) {
i += 1;
}
i
}
}
#[cfg(test)]
mod test {
#![allow(dead_code)]
use crate::{DoubleEndedBitstream, ArithmeticCoder, ArithmeticDecoder, TinySort};
struct CircularBitBuffer {
buf: Vec<u32>,
head: usize,
tail: usize
}
impl CircularBitBuffer {
pub fn new(buf: usize) -> CircularBitBuffer {
CircularBitBuffer {
buf: vec![0; buf],
head: 0,
tail: 0
}
}
pub fn used_space(&self) -> usize {
std::mem::size_of::<CircularBitBuffer>() + (self.len() + 3) / 8
}
pub fn len(&self) -> usize {
let mut head = self.head;
if head < self.tail {
head += self.buf.len() * 32;
}
head - self.tail
}
}
impl DoubleEndedBitstream for CircularBitBuffer {
fn pull(&mut self) -> Option<bool> {
if self.tail == self.head {
return None;
}
let rv = self.buf[self.tail / 32] & (1 << (self.tail % 32)) != 0;
self.tail += 1;
if self.tail == self.buf.len() * 32 {
self.tail = 0;
}
Some(rv)
}
fn push(&mut self, bit: bool) -> Result<(), ()> {
let mut head = self.head + 1;
if head == self.buf.len() * 32 {
head = 0;
}
if head == self.tail {
return Err(())
}
let mut val = self.buf[self.head / 32];
val &= !(1 << (self.head % 32));
val |= (bit as u32) << (self.head % 32);
self.buf[self.head / 32] = val;
self.head = head;
Ok(())
}
}
pub fn generate_random_values(amount: usize, max: u32) -> Vec<u32> {
use rand::prelude::*;
let mut buf = Vec::new();
let mut rng = thread_rng();
for _ in 0 .. amount {
buf.push(rng.gen_range(0, max));
}
buf
}
#[test]
pub fn test_classic_example() {
let mut sort = TinySort::new(8_000, 1_000_000, 100_000_000).unwrap();
let mut values = generate_random_values(1_000_000, 100_000_000);
for value in values.iter().cloned() {
sort.insert(value);
}
println!("used {}", sort.used_space());
let sorted: Vec<u32> = sort.into_iter().collect();
values.sort();
assert!(values == sorted);
}
#[test]
pub fn test_arithmetic_coding() {
use rand::prelude::*;
use rand::rngs::StdRng;
let mut buf = CircularBitBuffer::new(1_000_000);
let mut deque = std::collections::VecDeque::new();
let mut rng = StdRng::seed_from_u64(5);
let mut encoder = ArithmeticCoder::new(0x288df0c, 101);
let mut i = 0;
for _ in 0 .. 100 {
let r = rng.gen_range(0, 200);
encoder.encode_number(&mut buf, r);
deque.push_front(r);
i += 1;
}
let mut decoder = ArithmeticDecoder::new(0x288df0c, 101, &mut buf);
for _ in 0 .. 100_000_000 {
let r = rng.gen_range(0, 200);
encoder.encode_number(&mut buf, r);
deque.push_front(r);
let val = decoder.decode_number(&mut buf);
let check = deque.pop_back().unwrap();
if check != val {
panic!("val = {}, check = {}, i = {}", val, check, i);
}
i += 1;
}
}
}