bromberg_sl2/lib.rs
1/*!
2This is an implementation of the Tillich-Zémor-style hash function
3presented in the paper ["Navigating in the Cayley Graph of SL₂(𝔽ₚ)"
4](https://link.springer.com/article/10.1007%2Fs00233-015-9766-5) by
5Bromberg, Shpilrain, and Vdovina.
6
7> ### Warning
8>
9> This module is not produced by cryptography experts, but by
10> [some random guy](http://benwr.net). Furthermore, the algorithm
11> was published in 2017, and is itself not at all battle-tested. Only
12> use this library if you either (a) know what you're doing and have
13> read and understood our code, and/or (b) are building something that
14> does not rely heavily on the cryptographic properties of the hash
15> function.
16>
17> If you _are_ a cryptography expert, we welcome any bug reports or
18> pull requests! We also welcome them if you're not a cryptography
19> expert; this library is quite simple, and should be easy to grok
20> over a coffee with a copy of the paper linked above in hand.
21
22# What is this library for?
23
24This library implements a putatively-strong hash function H with the
25useful property that it gives a monoid homomorphism. This means there
26is a cheap operation `*` such that given strings `s1` and `s2`,
27`H(s1 ++ s2) = H(s1) * H(s2)`.
28
29This property is especially useful for applications where some very
30long string may be constructed via many different routes, but you'd
31nonetheless like to be able to quickly rule out unequal strings.
32
33It also allows you to hash _parts_ of your data as you acquire them,
34and then merge them later in whatever order is convenient. This allows
35for very flexible hashing schemes.
36
37H has some other cool properties, and is in some limited but
38potentially-useful sense "provably secure". See Bromberg et al. for
39details.
40
41# How to use this library
42
43This library provides the means to construct
44[`HashMatrix`](struct.HashMatrix.html)es, using [`hash`](fn.hash.html),
45which takes a slice of bytes. These hashes can be compared,
46or serialized to hex strings using
47[`to_hex`](struct.HashMatrix.html#method.to_hex).
48
49```
50use bromberg_sl2::*;
51assert_eq!(
52 hash("hello, world! It's fun to hash stuff!".as_ref()).to_hex(),
53 "01c5cf590d32654c87228c0d66441b200aec1439e54e724f05cd3c6c260634e565594b61988933e826e9705de22884ce007df0f733a371516ddd4ac9237f7a46");
54```
55
56Hashes may also be composed, using the `*` operator:
57
58```
59use bromberg_sl2::*;
60assert_eq!(
61 hash("hello, ".as_ref()) * hash("world!".as_ref()),
62 hash("hello, world!".as_ref())
63);
64```
65
66# Technical Details
67
68We use the A(2) and B(2) matrices as generators, and
69p = 2^127 - 1 as our prime order, for fast modular arithmetic.
70
71We have not yet attempted to seriously optimize this library, and
72performance is a secondary goal. As of right now our procedure is
73about 1/3 as fast as SHA3-512.
74
75We needed an architecture-agnostic cryptographic hash procedure with a
76monoid homomorphism respecting string concatenation, written in a
77low-level language. While there are
78[a](https://github.com/srijs/hwsl2-core)
79[few](https://github.com/nspcc-dev/tzhash)
80[implementations](https://github.com/phlegmaticprogrammer/tillich_zemor_hash)
81of related algorithms, e.g. the venerable [but broken
82](https://link.springer.com/chapter/10.1007/978-3-642-19574-7_20) Tillich-Zémor hash,
83from ["Hashing with SL₂"
84](https://link.springer.com/chapter/10.1007/3-540-48658-5_5),
85none of them fulfill these desiderata.
86*/
87
88#![cfg_attr(not(feature = "std"), no_std)]
89
90#[macro_use]
91extern crate alloc;
92
93pub use crate::hash_matrix::{constmatmul, HashMatrix};
94
95use crate::lookup_table::{init_wyde_lookups, BYTE_LOOKUPS, WYDE_LOOKUPS};
96
97pub use crate::hash_matrix::I;
98
99mod hash_matrix;
100mod lookup_table;
101
102#[inline(always)]
103fn get_val_for_two(acc: HashMatrix, bs: &[u8], wyde_lookups: &[HashMatrix]) -> HashMatrix {
104 let b0 = unsafe { *bs.get_unchecked(0) };
105 let b1 = unsafe { *bs.get_unchecked(1) };
106 let index = ((b0 as usize) << 8) | (b1 as usize);
107 // let i = u16::from_be_bytes([b0, b1]) as usize;
108 let value = unsafe { *wyde_lookups.get_unchecked(index) };
109 acc * value
110}
111
112#[inline(always)]
113fn get_val_for_one(acc: HashMatrix, bs: u8) -> HashMatrix {
114 acc * unsafe { *BYTE_LOOKUPS.get_unchecked(bs as usize) }
115}
116
117/// The main export of this library: Give me a byte
118/// stream and I'll give you a hash.
119#[must_use]
120#[inline]
121pub fn hash(bytes: &[u8]) -> HashMatrix {
122 let wyde_lookups = WYDE_LOOKUPS.get_or_init(init_wyde_lookups);
123
124 let mut acc = I;
125 let iter = bytes.chunks_exact(2);
126 let rem = iter.remainder();
127
128 for bs in iter {
129 acc = get_val_for_two(acc, bs, &wyde_lookups);
130 }
131
132 if rem.is_empty() {
133 acc
134 } else {
135 get_val_for_one(acc, unsafe { *rem.get_unchecked(0) })
136 }
137}
138
139#[must_use]
140#[inline]
141#[cfg(feature = "std")]
142/// Same as `hash` but computes the hash of the byte stream in parallel.
143///
144/// The number of threads used is set automatically but can be overridden using the
145/// `RAYON_NUM_THREADS` environment variable
146pub fn hash_par(bytes: &[u8]) -> HashMatrix {
147 use rayon::prelude::*;
148
149 let wyde_lookups = WYDE_LOOKUPS.get_or_init(init_wyde_lookups);
150
151 let iter = bytes.par_chunks_exact(2);
152 let rem = iter.remainder();
153
154 let acc = iter
155 .fold(|| I, |acc, bs| get_val_for_two(acc, bs, &wyde_lookups))
156 .reduce(
157 || I,
158 |mut acc, h| {
159 acc = acc * h;
160 acc
161 },
162 );
163
164 if rem.is_empty() {
165 acc
166 } else {
167 get_val_for_one(acc, unsafe { *rem.get_unchecked(0) })
168 }
169}
170
171/// This procedure implements the same hash function as `hash()`, but
172/// with a different performance tradeoff. The first time it's invoked,
173/// `hash` computes a 4MiB table of all the hashes for every pair of
174/// bytes, which are then used to double hashing speed. For applications
175/// that need to do a lot of hashing, this is nearly twice as fast as
176/// `hash()`, but it also requires much more memory and initialization
177/// time. As a rule of thumb, if you're going to hash less than 100KiB
178/// during your program's execution, you should probably use
179/// `hash_strict`.
180#[must_use]
181#[inline]
182pub fn hash_strict(bytes: &[u8]) -> HashMatrix {
183 let mut acc = I;
184 for b in bytes {
185 acc = acc * unsafe { *BYTE_LOOKUPS.get_unchecked(*b as usize) };
186 }
187 acc
188}
189
190/// Things that can be hashed using this crate.
191pub trait BrombergHashable {
192 fn bromberg_hash(&self) -> HashMatrix;
193}
194
195impl BrombergHashable for [u8] {
196 #[inline]
197 fn bromberg_hash(&self) -> HashMatrix {
198 hash(self)
199 }
200}
201
202impl<T: BrombergHashable> BrombergHashable for &T {
203 #[inline]
204 fn bromberg_hash(&self) -> HashMatrix {
205 (**self).bromberg_hash()
206 }
207}
208
209impl<T: BrombergHashable> BrombergHashable for &mut T {
210 #[inline]
211 fn bromberg_hash(&self) -> HashMatrix {
212 (**self).bromberg_hash()
213 }
214}
215
216impl<T: BrombergHashable> BrombergHashable for alloc::boxed::Box<T> {
217 #[inline]
218 fn bromberg_hash(&self) -> HashMatrix {
219 (**self).bromberg_hash()
220 }
221}
222
223impl<T: BrombergHashable> BrombergHashable for alloc::rc::Rc<T> {
224 #[inline]
225 fn bromberg_hash(&self) -> HashMatrix {
226 (**self).bromberg_hash()
227 }
228}