hashheap 0.2.2 - Docs.rs

//! This module contains a new implementation of the hashed heap structure,
//! [ConstHashHeap]. The main external difference is that the capacity
//! of the structure must be known at compile time.
//! However, a [ConstHashHeap::resize] function is provided to transfer
//! entries to a structure of higher capacity.  There are also many
//! internal changes that should improve performance.  Rust HashMaps are
//! no longer employed anywhere.  Instead, each ConstHashHeap contains
//! two arrays, keys and values.  The keys array contains (Option) entries of
//! the form (key,vi) where vi is the index in the values array that contains
//! the mapped value.  The values array contains entries of the form
//! (value,ki) where ki is the index in the keys array of the corresponding
//! key.  The keys array is treated as a closed hashmap (open addressing)
//! with a linear probing rehash function.  The values array is treated
//! as a binary heap. Swapping values in the values array updates
//! the corresponding information in the keys array using the ki index
//! which it possesses.  The ki index does not change until the entire
//! structure is resized.


#![allow(dead_code)]
#![allow(unused_variables)]
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
#![allow(unused_parens)]
#![allow(unused_mut)]
#![allow(unused_assignments)]
#![allow(unused_doc_comments)]
#![allow(unused_imports)]
use core::cell::{Ref, RefCell, RefMut};
use core::cmp::Ord;
use core::fmt::{Display,Debug};
use std::collections::hash_map::RandomState;
use core::hash::{BuildHasher, Hash, Hasher};


//global heap calculations
fn left(i:usize) -> usize { 2*i+1 }
fn right(i:usize) -> usize { 2*i+2 }
fn parent(i:usize) -> usize { (i-1)/2 }

fn optcmp<VT:PartialOrd>(a:&Option<(VT,usize)>, b:&Option<(VT,usize)>, neg:bool) -> bool
{
  match (a,b,neg) {
    (Some((av,_)), Some((bv,_)),true) => av < bv,
    (Some((av,_)), Some((bv,_)),false) => bv < av,
    _ => false,
  }
}

/// A version of hashheap map with const capacity.
/// The default capacity of a ConstHashHeap is 1024.  Exact powers of
/// two are recommended for other capacities.  Resizing is recommended
/// when the [ConstHashHeap::load_factor] function returns a value greater 
/// than 0.75.  
#[derive(Clone, Debug)]
pub struct ConstHashHeap<KT,VT, const CAPACITY:usize = 1024>
{
   keys : [Option<(KT,usize)>;CAPACITY],
   vals : [Option<(VT,usize)>;CAPACITY],
   maxhashes : [usize;CAPACITY], // max number of hashes from start
   size : usize,
   autostate: RandomState,
   lessthan : fn(&Option<(VT,usize)>,&Option<(VT,usize)>) -> bool,
}
impl<KT:Hash+Eq, VT:PartialOrd, const CAP:usize> ConstHashHeap<KT,VT,CAP> {

  /// creates a new ConstHashHeap.  The boolean argument distinguishes
  /// maxheap and minheap, true = maxheap.
  pub fn new(maxheap:bool) -> Self {
    ConstHashHeap {
      keys : [const { None }; CAP],
      vals : [const { None }; CAP], //std::array::from_fn(|_|None),
      maxhashes : [0;CAP],
      size : 0,
      autostate : RandomState::new(),
      lessthan : if maxheap{|a,b|optcmp(a,b,true)} else {|a,b|optcmp(a,b,false)},
    }
  }//new

  fn hash(&self,key:&KT) -> usize {
     let mut bs = self.autostate.build_hasher(); //rs.build_hasher();
     key.hash(&mut bs);
     (bs.finish() as usize) % CAP
  }

  fn rehash(h:usize) -> usize { (h+1) % CAP }

  fn borrow_hash(&self, key:&KT, rs:&RandomState) -> usize {
     let mut bs = rs.build_hasher();
     key.hash(&mut bs);
     (bs.finish() as usize) % CAP
  }

  fn swap(&mut self, i:usize, k:usize) {
    self.vals.swap(i,k);
    if let Some((ival,ik)) = &mut self.vals[i] {
         self.keys[*ik].as_mut().map(|pair|pair.1 = i);
    }
    if let Some((kval,kk)) = &mut self.vals[k] {
         self.keys[*kk].as_mut().map(|pair|{pair.1 = k;});
    }    
  }//swap

  fn swapup(&mut self, mut i:usize) -> usize {
    let mut pi = if (i>0) {parent(i)} else {0};
    while (i>0 && (self.lessthan)(&self.vals[pi],&self.vals[i])) {
       self.swap(i,pi);
       i = pi;
       if (i>0) {pi = parent(i)};
    }
    i
  }//swapup

  fn swapdown(&mut self, mut i:usize) -> usize {
    let mut si = Some(0);
    while si.is_some() {
      si = None;
      let lf = left(i);
      let rt = right(i);
      //println!("{i}: left {lf}, right {rt}");
      //println!("test: {}",(self.lessthan)(&self.vals[i],&self.vals[lf]));
      if (lf<self.size && (self.lessthan)(&self.vals[i],&self.vals[lf])) {
        si = Some(lf);
      }
      if(rt<self.size && (self.lessthan)(&self.vals[i],&self.vals[rt])
         && (self.lessthan)(&self.vals[lf],&self.vals[rt])) {
        si = Some(rt);
      }
      if let Some(k) = si {
        self.swap(i,k);
        i = k;
      }
    }//while
    i
  }//swapdown

  fn adjust(&mut self, i:usize, both:bool) -> usize {
    let k = self.swapup(i);
    if k==i && both {self.swapdown(i)} else {k}
  }

  /// The number of key-value pairs stored in the structure
  pub fn size(&self) -> usize {self.size}

  /// Either inserts a new key-value pair into the structure,
  /// or if a duplicate key already exists, change the value
  /// associated with the key.  As in a hashmap, keys must be
  /// unique.  true is returned on successful insertion and
  /// false is returned only if capacity has been reached.
  /// This operation takes O(log n) time.
  pub fn insert(&mut self, key:KT, val:VT) -> bool
  { 
    //if (self.size >= CAP) {return false;}
    let h0 = self.hash(&key);
    let mut h = h0;
    let mut hashes = 1;
    let mut target_index = -1;
    let mut keyfoundloc = None;
    loop {
      match &self.keys[h] {
        Some((k,vi)) if k==&key => {
          keyfoundloc = Some(*vi);
          break;
        },
        Some(_) => { h = Self::rehash(h); hashes+=1; },
        None if hashes < self.maxhashes[h0] => {
          if target_index == -1 { target_index = h as isize; }
          h=Self::rehash(h);
          hashes += 1;
        },
        None => {
          keyfoundloc = Some(self.size);
          break;
        },
      }//match
    }// loop
    match &keyfoundloc {  // reuse slot
      Some(vi) if *vi==self.size && self.size >= CAP => {
        return false;
      }
      Some(vi) if *vi == self.size => {
        self.size+=1;
        if target_index>=0 {h = target_index as usize;}
      },
      _ => {},
    }//match
    if hashes > self.maxhashes[h0] {
      self.maxhashes[h0] = hashes;
    }
    if let Some(vi) = keyfoundloc {
        self.keys[h] = Some((key,vi));
        self.vals[vi] = Some((val,h));
        self.adjust(vi, vi+1<self.size);
    }
    true
  }//set

  /// alias for [Self::insert]
  pub fn push(&mut self, key:KT, val:VT) -> bool {  // alias for insert
     self.insert(key,val)
  }

  /// returns reference to value associated with key, if it exists.
  /// This operation is also called in the implementation of the [core::ops::Index]
  /// trait. This is an O(log n) operation.
  pub fn get(&self, key:&KT) -> Option<&VT> {
    let mut answer = None;
    let h0 = self.hash(&key);
    let mut h = h0;
    let mut hashes = 1;
    loop {
      match &self.keys[h] {
        Some((k,vi)) if k==key => {
          answer = self.vals[*vi].as_ref().map(|p|&p.0);
          break;
        },
        _ if hashes < self.maxhashes[h0] => {
          h=Self::rehash(h);
          hashes += 1;        
        }
        _ => { break; }
      }//match
    }//loop
    answer
  }//get

  /// modifies the entry associated with the key, if it exists, using
  /// the provided closure.  This procedure will adjust the position of
  /// of the entry in the priority heap after modification. Returns true
  /// on successful modification and false if key was not found.
  /// This operation is O(log n) plus the cost of calling the closure.
  pub fn modify<F:FnOnce(&mut VT)>(&mut self, key:&KT, f:F) -> bool {
    let h0 = self.hash(&key);
    let mut h = h0;
    let mut hashes = 1;
    let mut keyfoundloc = None;
    loop {
      match &self.keys[h] {
        Some((k,vi)) if k==key => {
          keyfoundloc = Some(*vi);
          break;
        },
        _ if hashes < self.maxhashes[h0] => {
          h=Self::rehash(h);
          hashes += 1;        
        }
        _ => { break; }
      }//match
    }//loop
    if let Some(vi) = keyfoundloc {
      self.vals[vi].as_mut().map(|p|f(&mut p.0));
      self.adjust(vi, vi+1<self.size);
      true
    }
    else {false}
  }// modify

  /// remove and return the key-value pair associated with the key.
  /// O(log n)
  pub fn remove(&mut self, key:&KT) -> Option<(KT,VT)> {
    let mut answer = None;
    let mut keyfoundloc = None;    
    let h0 = self.hash(&key);
    let mut h = h0;
    let mut hashes = 1;
    loop {
      match &self.keys[h] {
        Some((k,vi)) if k==key => {
          keyfoundloc = Some(*vi);
          break;
        },
        _ if hashes < self.maxhashes[h0] => {
          h=Self::rehash(h);
          hashes += 1;        
        }
        _ => { break; }
      }//match
    }//loop
    if let Some(vi) = keyfoundloc {
       let mut ak = None;
       let mut av = None;
       core::mem::swap(&mut ak, &mut self.keys[h]);
       core::mem::swap(&mut av, &mut self.vals[vi]);
       answer = ak.zip(av).map(|(a,b)|(a.0,b.0));
       // adjust heap;
       if (vi+1 != self.size) {
          self.swap(vi,self.size-1);
          self.adjust(vi,true);
       }
       self.size -= 1; 
    }
    answer
  }//remove

  /// remove and return the highest-priority key-value pair. O(log n).
  pub fn pop(&mut self) -> Option<(KT,VT)> {
    let mut answer = None;
    if self.size < 1 { return answer; }
    if let Some((_,ki)) = &self.vals[0] {
       let mut ak = None;
       let mut av = None;
       core::mem::swap(&mut ak, &mut self.keys[*ki]);
       core::mem::swap(&mut av, &mut self.vals[0]);
       answer = ak.zip(av).map(|(a,b)|(a.0,b.0));
       self.size -= 1;
       if (self.size>0) {
            self.swap(0,self.size);
            self.swapdown(0);
       }
    }
    answer  
  }//pop

  /// returns reference to highest-priority key-value pair without
  /// removal.  This operation is O(1).
  pub fn peek(&self) -> Option<(&KT,&VT)> {
    if self.size < 1 { None }
    else {
      self.vals[0].as_ref().and_then(|vp|
        self.keys[vp.1].as_ref().map(|kp|(&kp.0,&vp.0)))
    }
  }//peek

  /// The load factor is the size divided by the capacity.  Resizing is
  /// recommended when this factor is greater than 0.75.
  pub fn load_factor(&self) -> f32 {
    (self.size as f32) / (CAP as f32)
  }

  /// moves all entries to a ConstHashHeap of a new capacity.
  pub fn resize<const NEWCAP:usize>(mut self) -> ConstHashHeap<KT,VT,NEWCAP> {
    let mut hp2 = ConstHashHeap::new(true);
    hp2.lessthan = self.lessthan;
    hp2.size = self.size;
    for i in 0..self.size {
      let mut h = 0;
      if let Some((_,ki)) = &self.vals[i] {
         self.keys[*ki].as_ref().map(|(key,vi)|{
           let h0 = hp2.borrow_hash(key,&self.autostate);
           h = h0;
           let mut hashes = 1;
           loop {
             match hp2.keys[h] {
               Some(_) => {
                 h = (h+1) % NEWCAP;
                 hashes += 1;
               },
               None => {
                 break;
               },
             }//match
           }//loop
           hp2.maxhashes[h0] = hashes;
         });
         core::mem::swap(&mut hp2.keys[h],&mut self.keys[*ki]);
         self.vals[i].as_mut().map(|p|{p.1 = h;});
      } // if-let
      core::mem::swap(&mut hp2.vals[i], &mut self.vals[i]);      
    }//for
    hp2.autostate = self.autostate;
    hp2
  }//resize

  /// moves all entries to a new ConstHashHeap of the same capacity. This
  /// operation may be called after a large number of key-value pairs
  /// had been removed, which should improve hash lookup performance.
  pub fn refresh(mut self) -> Self {
    self.resize()
  }

  /// returns a non-consuming iterator over all entries in no particular
  /// order.
  pub fn iter<'a>(&'a self) -> CHHIter<'a,KT,VT,CAP> {
    CHHIter {
      chh : self,
      index : 0,
    }
  }//iter

  /// returns a consuming iterator over all entries in order of priority.
  /// This iterator is equivalent to repeatedly calling [Self::pop], and
  /// will empty the structure of all entries.
  pub fn priority_stream<'a>(&'a mut self) -> PriorityStream<'a,KT,VT,CAP> {
    PriorityStream(self)
  }
  
}// main impl

/// indexed get, unwraps
impl<KT: Hash + Eq, VT: PartialOrd, const CAP:usize> core::ops::Index<&KT>
for ConstHashHeap<KT,VT,CAP>
{
    type Output = VT;
    fn index(&self, index: &KT) -> &Self::Output {
        self.get(index).expect("key not found")
    }
} //impl Index

impl<KT:Display+Debug+Hash+Eq, VT:Display+Debug+PartialOrd, const CAP:usize> ConstHashHeap<KT,VT,CAP>
{
  /// For debugging and performance statistics.  The implementation uses a
  /// separate array to keep track of the maximum number of rehash
  /// operations required starting from an original hash index.  This improves
  /// the performance of searching for a key.  The diagnostics procedure 
  /// returns the average number of hash- and rehash operations required
  /// starting from an original hash index.  The smaller the number (closer
  /// to one) the better the performance.  A large average suggests that
  /// [Self::resize] or [Self::refresh] is needed. Note that the procedure
  /// is not a constant-time operation and is in fact O(capacity).
  /// The boolean argument gives the
  /// option of printing the arrays underneath (not recommended).
 pub fn diagnostics(&self, print:bool) -> f32 {

   // compute average number of hashes from maxhashes
   let mut mx = 0;
   let mut hashes = 0;
   for i in 0..CAP {
      if self.maxhashes[i] > 0 {
        mx += 1;
        hashes += self.maxhashes[i];
      }
   }
   let ave_hashes = if mx==0 {0.0} else {(hashes as f32) / (mx as f32)};
   if print {
    println!("---  table ---");
    for i in 0..CAP {
      println!("{i}: {:?}, \t {:?} \t hash {}   maxhs {}",&self.keys[i],&self.vals[i],
       self.keys[i].as_ref().map(|p|self.hash(&p.0).to_string()).unwrap_or(String::new()),self.maxhashes[i]);
    }
    println!("--table size {}, capacity {}, average number of hash/rehashes: {}--", self.size, CAP, ave_hashes);
   }//print
   ave_hashes
  }//diagnostics
}//diagnostics


/////////////////// iterators

/// Iterator for the [ConstHashHeap::iter] function
pub struct CHHIter<'a, KT,VT, const CAP:usize>
{
  chh : &'a ConstHashHeap<KT,VT,CAP>,
  index : usize,
}//CKVIter
impl<'a,KT: Hash + Eq, VT: PartialOrd, const CAP:usize>
Iterator for CHHIter<'a,KT,VT,CAP> {
  type Item = (&'a KT, &'a VT);
  fn next(&mut self) -> Option<Self::Item> {
    let mut answer = None;
    if self.index >= self.chh.size() {return answer;}
    self.index+=1;
    if let Some((val,ki)) = &self.chh.vals[self.index-1] {
      if let Some((key,vi)) = &self.chh.keys[*ki] {
         answer = Some((key,val));
      }
    }
    answer
  }//next
}// CHHIter impl

impl<'a, KT: Hash + Eq, VT: PartialOrd, const CAP:usize> IntoIterator
for &'a ConstHashHeap<KT,VT,CAP>
{
  type Item = (&'a KT, &'a VT);
  type IntoIter = CHHIter<'a,KT,VT,CAP>;
  fn into_iter(self) -> Self::IntoIter {
    self.iter()
  }
}// ref intoiter

/// Iterator for the [ConstHashHeap::priority_stream] function
pub struct PriorityStream<'a,KT,VT,const CAP:usize>(&'a mut ConstHashHeap<KT,VT,CAP>);
impl<'a,KT: Hash + Eq, VT: PartialOrd, const CAP:usize> Iterator
for PriorityStream<'a,KT,VT,CAP>
{
  type Item = (KT,VT);
  fn next(&mut self) -> Option<Self::Item> {
    self.0.pop()
  }
}

impl<'a, KT: Hash + Eq, VT: PartialOrd, const CAP:usize> IntoIterator
for &'a mut ConstHashHeap<KT,VT,CAP>
{
  type Item = (KT,VT);
  type IntoIter = PriorityStream<'a,KT,VT,CAP>;
  fn into_iter(self) -> Self::IntoIter {
    PriorityStream(self)
  }
}// ref intoiter