jenkins_hash 0.2.0

Native rust implementation of the hash algorithms from Bob Jenkins.
Documentation
use std::num::Wrapping;

const START: u32 = 0x9e3779b9;

/// # Lookup2
/// see: <https://www.burtleburtle.net/bob/hash/doobs.html>
///
///
/// **Do NOT use for cryptographic purposes.**
///
/// ## Example
/// ```rust
#[doc = include_str!("../examples/basic.rs")]
/// ```
///
/// ## Reference C implemenatiom
/// ```c
/// typedef  unsigned long  int  ub4;   /* unsigned 4-byte quantities */
/// typedef  unsigned       char ub1;   /* unsigned 1-byte quantities */
///
/// #define hashsize(n) ((ub4)1<<(n))
/// #define hashmask(n) (hashsize(n)-1)
///
/// /*
/// --------------------------------------------------------------------
/// mix -- mix 3 32-bit values reversibly.
/// For every delta with one or two bits set, and the deltas of all three
///   high bits or all three low bits, whether the original value of a,b,c
///   is almost all zero or is uniformly distributed,
/// * If mix() is run forward or backward, at least 32 bits in a,b,c
///   have at least 1/4 probability of changing.
/// * If mix() is run forward, every bit of c will change between 1/3 and
///   2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
/// mix() was built out of 36 single-cycle latency instructions in a
///   structure that could supported 2x parallelism, like so:
///       a -= b;
///       a -= c; x = (c>>13);
///       b -= c; a ^= x;
///       b -= a; x = (a<<8);
///       c -= a; b ^= x;
///       c -= b; x = (b>>13);
///       ...
///   Unfortunately, superscalar Pentiums and Sparcs can't take advantage
///   of that parallelism.  They've also turned some of those single-cycle
///   latency instructions into multi-cycle latency instructions.  Still,
///   this is the fastest good hash I could find.  There were about 2^^68
///   to choose from.  I only looked at a billion or so.
/// --------------------------------------------------------------------
/// */
/// #define mix(a,b,c) \
/// { \
///   a -= b; a -= c; a ^= (c>>13); \
///   b -= c; b -= a; b ^= (a<<8); \
///   c -= a; c -= b; c ^= (b>>13); \
///   a -= b; a -= c; a ^= (c>>12);  \
///   b -= c; b -= a; b ^= (a<<16); \
///   c -= a; c -= b; c ^= (b>>5); \
///   a -= b; a -= c; a ^= (c>>3);  \
///   b -= c; b -= a; b ^= (a<<10); \
///   c -= a; c -= b; c ^= (b>>15); \
/// }
///
/// /*
/// --------------------------------------------------------------------
/// hash() -- hash a variable-length key into a 32-bit value
///   k       : the key (the unaligned variable-length array of bytes)
///   len     : the length of the key, counting by bytes
///   initval : can be any 4-byte value
/// Returns a 32-bit value.  Every bit of the key affects every bit of
/// the return value.  Every 1-bit and 2-bit delta achieves avalanche.
/// About 6*len+35 instructions.
///
/// The best hash table sizes are powers of 2.  There is no need to do
/// mod a prime (mod is sooo slow!).  If you need less than 32 bits,
/// use a bitmask.  For example, if you need only 10 bits, do
///   h = (h & hashmask(10));
/// In which case, the hash table should have hashsize(10) elements.
///
/// If you are hashing n strings (ub1 **)k, do it like this:
///   for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
///
/// By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
/// code any way you wish, private, educational, or commercial.  It's free.
///
/// See http://burtleburtle.net/bob/hash/evahash.html
/// Use for hash table lookup, or anything where one collision in 2^^32 is
/// acceptable.  Do NOT use for cryptographic purposes.
/// --------------------------------------------------------------------
/// */
///
/// ub4 hash( k, length, initval)
/// register ub1 *k;        /* the key */
/// register ub4  length;   /* the length of the key */
/// register ub4  initval;  /* the previous hash, or an arbitrary value */
/// {
///    register ub4 a,b,c,len;
///
///    /* Set up the internal state */
///    len = length;
///    a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
///    c = initval;         /* the previous hash value */
///
///    /*---------------------------------------- handle most of the key */
///    while (len >= 12)
///    {
///       a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
///       b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
///       c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
///       mix(a,b,c);
///       k += 12; len -= 12;
///    }
///
///    /*------------------------------------- handle the last 11 bytes */
///    c += length;
///    switch(len)              /* all the case statements fall through */
///    {
///    case 11: c+=((ub4)k[10]<<24);
///    case 10: c+=((ub4)k[9]<<16);
///    case 9 : c+=((ub4)k[8]<<8);
///       /* the first byte of c is reserved for the length */
///    case 8 : b+=((ub4)k[7]<<24);
///    case 7 : b+=((ub4)k[6]<<16);
///    case 6 : b+=((ub4)k[5]<<8);
///    case 5 : b+=k[4];
///    case 4 : a+=((ub4)k[3]<<24);
///    case 3 : a+=((ub4)k[2]<<16);
///    case 2 : a+=((ub4)k[1]<<8);
///    case 1 : a+=k[0];
///      /* case 0: nothing left to add */
///    }
///    mix(a,b,c);
///    /*-------------------------------------------- report the result */
///    return c;
/// }
/// ```
pub fn lookup2(data: &[u8], initial: u32) -> u32 {
  let len = data.len();

  let mut a = Wrapping(START);
  let mut b = Wrapping(START);
  let mut c = Wrapping(initial);

  let mut i = 0;

  while (len - i) >= 12 {
    a += data[i] as u32
      + ((data[i + 1] as u32) << 8)
      + ((data[i + 2] as u32) << 16)
      + ((data[i + 3] as u32) << 24);
    b += data[i + 4] as u32
      + ((data[i + 5] as u32) << 8)
      + ((data[i + 6] as u32) << 16)
      + ((data[i + 7] as u32) << 24);
    c += data[i + 8] as u32
      + ((data[i + 9] as u32) << 8)
      + ((data[i + 10] as u32) << 16)
      + ((data[i + 11] as u32) << 24);
    (a, b, c) = mix(a, b, c);
    i += 12;
  }

  c += len as u32;

  let x = len - i;

  if x >= 11 {
    c += (data[i + 10] as u32) << 24
  }
  if x >= 10 {
    c += (data[i + 9] as u32) << 16
  }
  if x >= 9 {
    c += (data[i + 8] as u32) << 8
  }
  if x >= 8 {
    b += (data[i + 7] as u32) << 24
  }
  if x >= 7 {
    b += (data[i + 6] as u32) << 16
  }
  if x >= 6 {
    b += (data[i + 5] as u32) << 8
  }
  if x >= 5 {
    b += data[i + 4] as u32
  }
  if x >= 4 {
    a += (data[i + 3] as u32) << 24
  }
  if x >= 3 {
    a += (data[i + 2] as u32) << 16
  }
  if x >= 2 {
    a += (data[i + 1] as u32) << 8
  }
  if x >= 1 {
    a += data[i] as u32
  }

  (mix(a, b, c).2).0
}

fn mix(
  mut a: Wrapping<u32>,
  mut b: Wrapping<u32>,
  mut c: Wrapping<u32>,
) -> (Wrapping<u32>, Wrapping<u32>, Wrapping<u32>) {
  a -= b;
  a -= c;
  a ^= c >> 13;
  b -= c;
  b -= a;
  b ^= a << 8;
  c -= a;
  c -= b;
  c ^= b >> 13;
  a -= b;
  a -= c;
  a ^= c >> 12;
  b -= c;
  b -= a;
  b ^= a << 16;
  c -= a;
  c -= b;
  c ^= b >> 5;
  a -= b;
  a -= c;
  a ^= c >> 3;
  b -= c;
  b -= a;
  b ^= a << 10;
  c -= a;
  c -= b;
  c ^= b >> 15;
  (a, b, c)
}

#[cfg(test)]
mod tests {
  use std::num::Wrapping;

  use crate::lookup2;
  use crate::lookup2::mix;

  #[test]
  fn test_lookup2() {
    let in1 = "Yo mama is so fat that her bellybuttongs got an echo.".as_bytes();
    let in2 = "Yo mama is so stupid that she failed a survey.".as_bytes();

    assert_eq!(lookup2(in1, 0), 0x2CEB1226);
    assert_eq!(lookup2(in2, 0xDEADBEEF), 0xD1215833);
  }

  #[test]
  fn test_mix() {
    assert_eq!(
      mix(Wrapping(346972), Wrapping(5874), Wrapping(5287068)),
      (
        Wrapping(1151851378),
        Wrapping(1918881843),
        Wrapping(2302927392)
      )
    )
  }
}