bromberg_sl2/
lib.rs

1/*!
2This is an implementation of the Tillich-Zémor-style hash function
3presented in the paper ["Navigating in the Cayley Graph of SL₂(𝔽ₚ)"
4](https://link.springer.com/article/10.1007%2Fs00233-015-9766-5) by
5Bromberg, Shpilrain, and Vdovina.
6
7> ### Warning
8>
9> This module is not produced by cryptography experts, but by
10> [some random guy](http://benwr.net). Furthermore, the algorithm
11> was published in 2017, and is itself not at all battle-tested. Only
12> use this library if you either (a) know what you're doing and have
13> read and understood our code, and/or (b) are building something that
14> does not rely heavily on the cryptographic properties of the hash
15> function.
16>
17> If you _are_ a cryptography expert, we welcome any bug reports or
18> pull requests! We also welcome them if you're not a cryptography
19> expert; this library is quite simple, and should be easy to grok
20> over a coffee with a copy of the paper linked above in hand.
21
22# What is this library for?
23
24This library implements a putatively-strong hash function H with the
25useful property that it gives a monoid homomorphism. This means there
26is a cheap operation `*` such that given strings `s1` and `s2`,
27`H(s1 ++ s2) = H(s1) * H(s2)`.
28
29This property is especially useful for applications where some very
30long string may be constructed via many different routes, but you'd
31nonetheless like to be able to quickly rule out unequal strings.
32
33It also allows you to hash _parts_ of your data as you acquire them,
34and then merge them later in whatever order is convenient. This allows
35for very flexible hashing schemes.
36
37H has some other cool properties, and is in some limited but
38potentially-useful sense "provably secure". See Bromberg et al. for
39details.
40
41# How to use this library
42
43This library provides the means to construct
44[`HashMatrix`](struct.HashMatrix.html)es, using [`hash`](fn.hash.html),
45which takes a slice of bytes. These hashes can be compared,
46or serialized to hex strings using
47[`to_hex`](struct.HashMatrix.html#method.to_hex).
48
49```
50use bromberg_sl2::*;
51assert_eq!(
52  hash("hello, world! It's fun to hash stuff!".as_ref()).to_hex(),
53  "01c5cf590d32654c87228c0d66441b200aec1439e54e724f05cd3c6c260634e565594b61988933e826e9705de22884ce007df0f733a371516ddd4ac9237f7a46");
54```
55
56Hashes may also be composed, using the `*` operator:
57
58```
59use bromberg_sl2::*;
60assert_eq!(
61  hash("hello, ".as_ref()) * hash("world!".as_ref()),
62  hash("hello, world!".as_ref())
63);
64```
65
66# Technical Details
67
68We use the A(2) and B(2) matrices as generators, and
69p = 2^127 - 1 as our prime order, for fast modular arithmetic.
70
71We have not yet attempted to seriously optimize this library, and
72performance is a secondary goal. As of right now our procedure is
73about 1/3 as fast as SHA3-512.
74
75We needed an architecture-agnostic cryptographic hash procedure with a
76monoid homomorphism respecting string concatenation, written in a
77low-level language. While there are
78[a](https://github.com/srijs/hwsl2-core)
79[few](https://github.com/nspcc-dev/tzhash)
80[implementations](https://github.com/phlegmaticprogrammer/tillich_zemor_hash)
81of related algorithms, e.g. the venerable [but broken
82](https://link.springer.com/chapter/10.1007/978-3-642-19574-7_20) Tillich-Zémor hash,
83from ["Hashing with SL₂"
84](https://link.springer.com/chapter/10.1007/3-540-48658-5_5),
85none of them fulfill these desiderata.
86*/
87
88#![cfg_attr(not(feature = "std"), no_std)]
89
90#[macro_use]
91extern crate alloc;
92
93pub use crate::hash_matrix::{constmatmul, HashMatrix};
94
95use crate::lookup_table::{init_wyde_lookups, BYTE_LOOKUPS, WYDE_LOOKUPS};
96
97pub use crate::hash_matrix::I;
98
99mod hash_matrix;
100mod lookup_table;
101
102#[inline(always)]
103fn get_val_for_two(acc: HashMatrix, bs: &[u8], wyde_lookups: &[HashMatrix]) -> HashMatrix {
104    let b0 = unsafe { *bs.get_unchecked(0) };
105    let b1 = unsafe { *bs.get_unchecked(1) };
106    let index = ((b0 as usize) << 8) | (b1 as usize);
107    // let i = u16::from_be_bytes([b0, b1]) as usize;
108    let value = unsafe { *wyde_lookups.get_unchecked(index) };
109    acc * value
110}
111
112#[inline(always)]
113fn get_val_for_one(acc: HashMatrix, bs: u8) -> HashMatrix {
114    acc * unsafe { *BYTE_LOOKUPS.get_unchecked(bs as usize) }
115}
116
117/// The main export of this library: Give me a byte
118/// stream and I'll give you a hash.
119#[must_use]
120#[inline]
121pub fn hash(bytes: &[u8]) -> HashMatrix {
122    let wyde_lookups = WYDE_LOOKUPS.get_or_init(init_wyde_lookups);
123
124    let mut acc = I;
125    let iter = bytes.chunks_exact(2);
126    let rem = iter.remainder();
127
128    for bs in iter {
129        acc = get_val_for_two(acc, bs, &wyde_lookups);
130    }
131
132    if rem.is_empty() {
133        acc
134    } else {
135        get_val_for_one(acc, unsafe { *rem.get_unchecked(0) })
136    }
137}
138
139#[must_use]
140#[inline]
141#[cfg(feature = "std")]
142/// Same as `hash` but computes the hash of the byte stream in parallel.
143///
144/// The number of threads used is set automatically but can be overridden using the
145/// `RAYON_NUM_THREADS` environment variable
146pub fn hash_par(bytes: &[u8]) -> HashMatrix {
147    use rayon::prelude::*;
148
149    let wyde_lookups = WYDE_LOOKUPS.get_or_init(init_wyde_lookups);
150
151    let iter = bytes.par_chunks_exact(2);
152    let rem = iter.remainder();
153
154    let acc = iter
155        .fold(|| I, |acc, bs| get_val_for_two(acc, bs, &wyde_lookups))
156        .reduce(
157            || I,
158            |mut acc, h| {
159                acc = acc * h;
160                acc
161            },
162        );
163
164    if rem.is_empty() {
165        acc
166    } else {
167        get_val_for_one(acc, unsafe { *rem.get_unchecked(0) })
168    }
169}
170
171/// This procedure implements the same hash function as `hash()`, but
172/// with a different performance tradeoff. The first time it's invoked,
173/// `hash` computes a 4MiB table of all the hashes for every pair of
174/// bytes, which are then used to double hashing speed. For applications
175/// that need to do a lot of hashing, this is nearly twice as fast as
176/// `hash()`, but it also requires much more memory and initialization
177/// time. As a rule of thumb, if you're going to hash less than 100KiB
178/// during your program's execution, you should probably use
179/// `hash_strict`.
180#[must_use]
181#[inline]
182pub fn hash_strict(bytes: &[u8]) -> HashMatrix {
183    let mut acc = I;
184    for b in bytes {
185        acc = acc * unsafe { *BYTE_LOOKUPS.get_unchecked(*b as usize) };
186    }
187    acc
188}
189
190/// Things that can be hashed using this crate.
191pub trait BrombergHashable {
192    fn bromberg_hash(&self) -> HashMatrix;
193}
194
195impl BrombergHashable for [u8] {
196    #[inline]
197    fn bromberg_hash(&self) -> HashMatrix {
198        hash(self)
199    }
200}
201
202impl<T: BrombergHashable> BrombergHashable for &T {
203    #[inline]
204    fn bromberg_hash(&self) -> HashMatrix {
205        (**self).bromberg_hash()
206    }
207}
208
209impl<T: BrombergHashable> BrombergHashable for &mut T {
210    #[inline]
211    fn bromberg_hash(&self) -> HashMatrix {
212        (**self).bromberg_hash()
213    }
214}
215
216impl<T: BrombergHashable> BrombergHashable for alloc::boxed::Box<T> {
217    #[inline]
218    fn bromberg_hash(&self) -> HashMatrix {
219        (**self).bromberg_hash()
220    }
221}
222
223impl<T: BrombergHashable> BrombergHashable for alloc::rc::Rc<T> {
224    #[inline]
225    fn bromberg_hash(&self) -> HashMatrix {
226        (**self).bromberg_hash()
227    }
228}