use iterators::coalesce::Coalesce;
use std::fmt::Debug;
use timely_sort::Unsigned;
#[derive(Debug)]
pub struct Compact<K, V> {
pub keys: Vec<K>,
pub cnts: Vec<u32>,
pub vals: Vec<(V, i32)>,
}
impl<K: Ord+Debug, V: Ord> Compact<K, V> {
pub fn new(k: usize, v: usize) -> Compact<K, V> {
Compact {
keys: Vec::with_capacity(k),
cnts: Vec::with_capacity(k),
vals: Vec::with_capacity(v),
}
}
pub fn size(&self) -> usize {
self.keys.len() * ::std::mem::size_of::<K>() +
self.cnts.len() * 4 +
self.vals.len() * ::std::mem::size_of::<(V,i32)>()
}
pub fn extend<I: Iterator<Item=((K, V), i32)>>(&mut self, mut iterator: I) {
if let Some(((mut old_key, val), wgt)) = iterator.next() {
let mut key_cnt = 1;
self.vals.push((val, wgt));
for ((key, val), wgt) in iterator {
self.vals.push((val,wgt));
if old_key != key {
self.keys.push(old_key);
self.cnts.push(key_cnt);
old_key = key;
key_cnt = 0;
}
key_cnt += 1;
}
self.keys.push(old_key);
self.cnts.push(key_cnt);
}
}
pub fn extend_by(&mut self, buffer: &mut Vec<((K, V), i32)>) {
let mut cursor = 0;
for index in 1 .. buffer.len() {
if buffer[cursor].0 == buffer[index].0 {
buffer[cursor].1 += buffer[index].1;
}
else {
if buffer[cursor].1 != 0 {
cursor += 1;
}
buffer.swap(cursor, index);
}
}
if buffer[cursor].1 != 0 {
cursor += 1;
}
buffer.truncate(cursor);
let mut iter = buffer.drain(..);
if let Some(((key1,val1),wgt1)) = iter.next() {
let mut prev_len = self.vals.len();
self.keys.push(key1);
self.vals.push((val1, wgt1));
for ((key, val), wgt) in iter {
if self.keys[self.keys.len() - 1] != key {
self.keys.push(key);
self.cnts.push((self.vals.len() - prev_len) as u32);
prev_len = self.vals.len();
}
self.vals.push((val,wgt));
}
self.cnts.push((self.vals.len() - prev_len) as u32);
}
}
pub fn from_radix<U: Unsigned+Default, F: Fn(&K)->U>(source: &mut Vec<Vec<((K,V),i32)>>, function: &F) -> Option<Compact<K,V>> {
let mut size = 0;
for list in source.iter() {
size += list.len();
}
let mut result = Compact::new(size,size);
let mut buffer = vec![];
let mut current = Default::default();
for ((key, val), wgt) in source.iter_mut().flat_map(|x| x.drain(..)) {
let hash = function(&key);
if buffer.len() > 0 && hash != current {
buffer.sort_by(|x: &((K,V),i32),y: &((K,V),i32)| x.0.cmp(&y.0));
result.extend_by(&mut buffer);
}
buffer.push(((key,val),wgt));
current = hash;
}
if buffer.len() > 0 {
buffer.sort_by(|x: &((K,V),i32),y: &((K,V),i32)| x.0.cmp(&y.0));
result.extend(buffer.drain(..).coalesce());
}
if result.vals.len() > 0 {
result.keys.shrink_to_fit();
result.cnts.shrink_to_fit();
result.vals.shrink_to_fit();
Some(result)
}
else {
None
}
}
pub fn session<'a>(&'a mut self) -> CompactSession<'a, K, V> {
CompactSession::new(self)
}
pub fn push<I: Iterator<Item=(V, i32)>>(&mut self, key: K, iterator: I) {
let mut session = self.session();
for (val, wgt) in iterator {
session.push(val, wgt);
}
session.done(key);
}
}
pub struct CompactSession<'a, K: 'a, V: 'a> {
compact: &'a mut Compact<K, V>,
len: usize,
}
impl<'a, K: 'a, V: 'a> CompactSession<'a, K, V> {
pub fn new(compact: &'a mut Compact<K, V>) -> CompactSession<'a, K, V> {
let len = compact.vals.len();
CompactSession {
compact: compact,
len: len,
}
}
#[inline]
pub fn push(&mut self, val: V, wgt: i32) {
self.compact.vals.push((val,wgt));
}
pub fn done(self, key: K) {
if self.compact.vals.len() > self.len {
self.compact.keys.push(key);
self.compact.cnts.push((self.compact.vals.len() - self.len) as u32);
}
}
}