hash_ids/
lib.rs

1/*!
2# hash-ids
3
4A fast, dependency-free implementation for [hashids](https://hashids.org/).
5
6## Usage
7
8```rust
9fn main() {
10    let hash_ids = hash_ids::HashIds::builder()
11        .with_salt("Arbitrary string")
12        .finish();
13    assert_eq!("neHrCa", hash_ids.encode(&[1, 2, 3]));
14    match hash_ids.decode("neHrCa"){
15        Ok(decode)=>{ assert_eq!(vec![1, 2, 3], decode); }
16        Err(e)=>{ println!("{}",e); }
17    }
18}
19```
20*/
21
22use std::collections::VecDeque;
23
24const MIN_ALPHABET_LENGTH: usize = 16;
25const SEPARATOR_DIV: f32 = 3.5;
26const GUARD_DIV: f32 = 12.0;
27const DEFAULT_SEPARATORS: &str = "cfhistuCFHISTU";
28const DEFAULT_ALPHABET: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890";
29
30/// Error container for various errors
31#[derive(Debug)]
32pub enum Error {
33    AlphabetTooSmall,
34    ContainsSpace,
35    AlphabetNotUnique,
36    InvalidHash,
37    MissingLotteryChar,
38}
39
40impl std::fmt::Display for Error {
41    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42        match self {
43            Error::AlphabetTooSmall => "Alphabet must contain at least 16 unique characters".fmt(f),
44            Error::ContainsSpace => "Alphabet may not contain spaces".fmt(f),
45            Error::AlphabetNotUnique => "Alphabet must contain unique characters".fmt(f),
46            Error::InvalidHash => "Invalid hash provided".fmt(f),
47            Error::MissingLotteryChar => "Hash is missing the lottery character".fmt(f),
48        }
49    }
50}
51
52impl std::error::Error for Error {}
53
54/// Builder for `HashIds`
55#[derive(Debug)]
56pub struct HashIdsBuilder {
57    salt: Vec<char>,
58    min_length: usize,
59}
60
61impl HashIdsBuilder {
62    /// Creates a new `HashIdsBuilder` instance with default values.
63    pub fn new() -> Self {
64        Self { salt: vec![], min_length: 0 }
65    }
66}
67
68/// Same as `HashIdsBuilder`, but with custom alphabet (which can fail)
69#[derive(Debug)]
70pub struct HashIdsBuilderWithCustomAlphabet {
71    inner: HashIdsBuilder,
72    alphabet: Vec<char>,
73}
74
75impl HashIdsBuilderWithCustomAlphabet {
76    /// Set the salt (arbitrary string) for the `HashIds`
77    pub fn with_salt(mut self, salt: &str) -> Self {
78        self.inner = self.inner.with_salt(salt);
79        self
80    }
81
82    /// Set the minimum length for the encoded string
83    pub fn with_min_length(mut self, min_length: usize) -> Self {
84        self.inner = self.inner.with_min_length(min_length);
85        self
86    }
87
88    /// Convert the builder to the finished `HashIds`
89    ///
90    /// Can fail if the custom alphabet won't work
91    pub fn finish(self) -> Result<HashIds, Error> {
92        let Self { inner: HashIdsBuilder { salt, min_length }, mut alphabet } = self;
93
94        let separators =
95            DEFAULT_SEPARATORS.chars().filter(|x| alphabet.contains(x)).collect::<Vec<_>>();
96
97        alphabet = alphabet.drain(..).filter(|x| !separators.contains(x)).collect();
98
99        alphabet = alphabet
100            .clone()
101            .into_iter()
102            .enumerate()
103            .filter(|(i, c)| alphabet.iter().position(|a| a == c) == Some(*i))
104            .map(|(_, c)| c)
105            .collect();
106
107        if alphabet.len() + separators.len() < MIN_ALPHABET_LENGTH {
108            return Err(Error::AlphabetTooSmall);
109        }
110
111        if alphabet.contains(&' ') {
112            return Err(Error::ContainsSpace);
113        }
114
115        if alphabet.len() != alphabet.iter().collect::<std::collections::HashSet<_>>().len() {
116            return Err(Error::AlphabetNotUnique);
117        }
118
119        Ok(HashIds { salt, min_length, alphabet, separators, guards: Vec::new() }.finish())
120    }
121}
122
123impl HashIdsBuilder {
124    /// Set the salt (arbitrary string) for the `HashIds`
125    pub fn with_salt(mut self, salt: &str) -> Self {
126        self.salt = salt.chars().collect();
127        self
128    }
129
130    /// Set the minimum length for the encoded string
131    pub fn with_min_length(mut self, min_length: usize) -> Self {
132        self.min_length = min_length;
133        self
134    }
135
136    /// Set the custom alphabet to use for encoding
137    pub fn with_alphabet(self, alphabet: &str) -> HashIdsBuilderWithCustomAlphabet {
138        HashIdsBuilderWithCustomAlphabet { inner: self, alphabet: alphabet.chars().collect() }
139    }
140
141    /// Convert the builder to the finished `HashIds`
142    pub fn finish(self) -> HashIds {
143        let Self { salt, min_length } = self;
144        HashIds {
145            salt,
146            min_length,
147            alphabet: DEFAULT_ALPHABET
148                .chars()
149                .filter(|x| !DEFAULT_SEPARATORS.contains(*x))
150                .collect(),
151            separators: DEFAULT_SEPARATORS.chars().collect(),
152            guards: Vec::new(),
153        }
154        .finish()
155    }
156}
157
158/// The encoder/decoder container
159#[derive(Debug, Clone)]
160pub struct HashIds {
161    salt: Vec<char>,
162    min_length: usize,
163    alphabet: Vec<char>,
164    separators: Vec<char>,
165    guards: Vec<char>,
166}
167
168impl HashIds {
169    /// Create a new `HashIdsBuilder`
170    pub fn builder() -> HashIdsBuilder {
171        HashIdsBuilder::new()
172    }
173
174    /// Completes the building process and adjusts alphabet, separators, and guards based on salt.
175    fn finish(mut self) -> Self {
176        let min_separators = Self::index_from_ratio(self.alphabet.len(), SEPARATOR_DIV);
177
178        if let Some(num_missing_separators) = min_separators.checked_sub(self.separators.len()) {
179            if num_missing_separators > 0 {
180                let mut new_alphabet = self.alphabet.split_off(num_missing_separators);
181                std::mem::swap(&mut self.alphabet, &mut new_alphabet);
182                self.separators.append(&mut new_alphabet);
183            }
184        }
185
186        self.alphabet = Self::reorder(&self.alphabet, &self.salt);
187        self.separators = Self::reorder(&self.separators, &self.salt);
188
189        let num_guards = Self::index_from_ratio(self.alphabet.len(), GUARD_DIV);
190
191        if self.alphabet.len() < 3 {
192            self.guards = self.separators.split_off(num_guards);
193            std::mem::swap(&mut self.separators, &mut self.guards);
194        } else {
195            self.guards = self.alphabet.split_off(num_guards);
196            std::mem::swap(&mut self.alphabet, &mut self.guards);
197        }
198
199        self
200    }
201
202    /// Calculates an index for a given ratio.
203    fn index_from_ratio(dividend: usize, divisor: f32) -> usize {
204        (dividend as f32 / divisor).ceil() as _
205    }
206
207    /// Reorders characters in a string based on a salt value.
208    fn reorder(string: &[char], salt: &[char]) -> Vec<char> {
209        let mut out = string.to_vec();
210
211        if salt.is_empty() {
212            return out;
213        }
214
215        let mut int_sum = 0;
216        let mut index = 0;
217
218        for i in (1..string.len()).rev() {
219            let int = u32::from(salt[index]) as usize;
220            int_sum += int;
221            let j = (int + index + int_sum) % i;
222            out.swap(i, j);
223            index = (index + 1) % salt.len();
224        }
225
226        out
227    }
228
229    /// Hashes a number using the given alphabet.
230    fn hash(mut number: usize, alphabet: &[char]) -> Vec<char> {
231        let mut hashed = VecDeque::new();
232        loop {
233            hashed.push_front(alphabet[number % alphabet.len()]);
234            number /= alphabet.len();
235            if number == 0 {
236                break;
237            }
238        }
239        hashed.into_iter().collect()
240    }
241
242    /// Unhashes characters back into a number using the given alphabet.
243    fn unhash<I: Iterator<Item = char>>(hashed: I, alphabet: &[char]) -> Option<u64> {
244        let mut number: u64 = 0;
245
246        for c in hashed {
247            let pos = alphabet.iter().position(|y| y == &c)? as u64;
248            number *= alphabet.len() as u64;
249            number += pos;
250        }
251
252        Some(number)
253    }
254
255    /// Splits a string based on given splitters.
256    fn split<I: Iterator<Item = char>>(string: I, splitters: &[char]) -> Vec<Vec<char>> {
257        let mut parts = Vec::new();
258        let mut buf = Vec::new();
259        for c in string {
260            if splitters.contains(&c) {
261                parts.push(buf);
262                buf = Vec::new();
263            } else {
264                buf.push(c);
265            }
266        }
267        parts.push(buf);
268        parts
269    }
270
271    /// Encode a slice of numbers into a string
272    pub fn encode(&self, vals: &[u64]) -> String {
273        if vals.is_empty() {
274            return String::new();
275        }
276
277        let mut alphabet = self.alphabet.clone();
278
279        let vals_hash =
280            vals.iter().enumerate().fold(0, |acc, (i, x)| acc + ((*x as usize) % (i + 100)));
281
282        let lottery = self.alphabet[vals_hash % self.alphabet.len()];
283        let mut encoded = vec![lottery];
284
285        for (i, val) in vals.iter().map(|x| *x as usize).enumerate() {
286            let alphabet_salt = std::iter::once(lottery)
287                .chain(self.salt.iter().copied())
288                .chain(alphabet.iter().copied())
289                .take(alphabet.len())
290                .collect::<Vec<_>>();
291
292            alphabet = Self::reorder(&alphabet, &alphabet_salt);
293            let mut last = Self::hash(val, &alphabet);
294            let val = val % (u32::from(last[0]) as usize + i);
295            encoded.append(&mut last);
296            encoded.push(self.separators[val % self.separators.len()]);
297        }
298
299        let _ = encoded.pop();
300
301        if encoded.len() >= self.min_length {
302            encoded.into_iter().collect::<String>()
303        } else {
304            let mut encoded = encoded.into_iter().collect::<VecDeque<_>>();
305
306            let mut guard_index = (vals_hash + u32::from(encoded[0]) as usize) % self.guards.len();
307            encoded.push_front(self.guards[guard_index]);
308
309            if encoded.len() < self.min_length {
310                guard_index = (vals_hash + u32::from(encoded[2]) as usize) % self.guards.len();
311                encoded.push_back(self.guards[guard_index]);
312            }
313
314            let split_at = alphabet.len() / 2;
315
316            while encoded.len() < self.min_length {
317                alphabet = Self::reorder(&alphabet, &alphabet);
318
319                for c in alphabet[split_at..].iter().copied().rev() {
320                    encoded.push_front(c);
321                }
322                for c in alphabet[..split_at].iter().copied() {
323                    encoded.push_back(c);
324                }
325                if let Some(excess) = encoded.len().checked_sub(self.min_length) {
326                    if excess > 0 {
327                        let from_index = excess / 2;
328                        return encoded
329                            .drain(from_index..from_index + self.min_length)
330                            .collect::<String>();
331                    }
332                }
333            }
334
335            encoded.into_iter().collect::<String>()
336        }
337    }
338
339    /// Decode a string into a `Vec` of numbers
340    pub fn decode(&self, hash_str: &str) -> Result<Vec<u64>, Error> {
341        if hash_str.is_empty() {
342            return Ok(vec![]);
343        }
344
345        let mut alphabet = self.alphabet.clone();
346
347        let mut parts = Self::split(hash_str.chars(), &self.guards);
348
349        let mut hash_str =
350            if parts.len() >= 2 && parts.len() <= 3 { parts.remove(1) } else { parts.remove(0) };
351
352        let lottery = hash_str.get(0).ok_or(Error::MissingLotteryChar)?.clone();
353        hash_str.remove(0);
354
355        let parts = Self::split(hash_str.iter().copied(), &self.separators);
356
357        let mut out = Vec::with_capacity(parts.len());
358
359        for part in parts {
360            let alphabet_salt = std::iter::once(lottery)
361                .chain(self.salt.iter().copied())
362                .chain(alphabet.iter().copied())
363                .take(alphabet.len())
364                .collect::<Vec<_>>();
365            alphabet = Self::reorder(&alphabet, &alphabet_salt);
366
367            if let Some(number) = Self::unhash(part.iter().copied(), &alphabet) {
368                out.push(number)
369            } else {
370                return Err(Error::InvalidHash);
371            }
372        }
373
374        Ok(out)
375    }
376}
377
378#[cfg(test)]
379mod tests {
380    use super::*;
381
382    #[test]
383    fn test_small_alphabet_with_no_repeating_characters() {
384        assert!(HashIds::builder().with_alphabet("abcdefghijklmno").finish().is_err());
385    }
386
387    #[test]
388    fn test_small_alphabet_with_repeating_characters() {
389        assert!(HashIds::builder().with_alphabet("abcdecfghijklbmnoa").finish().is_err());
390    }
391
392    #[test]
393    fn test_empty() {
394        let hash_ids = HashIds::builder().finish();
395        assert_eq!("", hash_ids.encode(&[]));
396        assert_eq!(Vec::<u64>::new(), hash_ids.decode("").unwrap())
397    }
398
399    #[test]
400    fn test_default_salt() {
401        let hash_ids = HashIds::builder().finish();
402        assert_eq!("o2fXhV", hash_ids.encode(&[1, 2, 3]));
403        assert_eq!(vec![1, 2, 3], hash_ids.decode("o2fXhV").unwrap());
404    }
405
406    #[test]
407    fn test_single_number() {
408        let hash_ids = HashIds::builder().finish();
409
410        assert_eq!("j0gW", hash_ids.encode(&[12345]));
411        assert_eq!("jR", hash_ids.encode(&[1]));
412        assert_eq!("Lw", hash_ids.encode(&[22]));
413        assert_eq!("Z0E", hash_ids.encode(&[333]));
414        assert_eq!("w0rR", hash_ids.encode(&[9999]));
415
416        assert_eq!(vec![12345], hash_ids.decode("j0gW").unwrap());
417        assert_eq!(vec![1], hash_ids.decode("jR").unwrap());
418        assert_eq!(vec![22], hash_ids.decode("Lw").unwrap());
419        assert_eq!(vec![333], hash_ids.decode("Z0E").unwrap());
420        assert_eq!(vec![9999], hash_ids.decode("w0rR").unwrap());
421    }
422
423    #[test]
424    fn test_multiple_numbers() {
425        let hash_ids = HashIds::builder().finish();
426
427        assert_eq!("vJvi7On9cXGtD", hash_ids.encode(&[683, 94108, 123, 5]));
428        assert_eq!("o2fXhV", hash_ids.encode(&[1, 2, 3]));
429        assert_eq!("xGhmsW", hash_ids.encode(&[2, 4, 6]));
430        assert_eq!("3lKfD", hash_ids.encode(&[99, 25]));
431
432        assert_eq!(vec![683, 94108, 123, 5], hash_ids.decode("vJvi7On9cXGtD").unwrap());
433        assert_eq!(vec![1, 2, 3], hash_ids.decode("o2fXhV").unwrap());
434        assert_eq!(vec![2, 4, 6], hash_ids.decode("xGhmsW").unwrap());
435        assert_eq!(vec![99, 25], hash_ids.decode("3lKfD").unwrap());
436    }
437
438    #[test]
439    fn test_salt() {
440        let hash_ids = HashIds::builder().with_salt("Arbitrary string").finish();
441
442        assert_eq!("QWyf8yboH7KT2", hash_ids.encode(&[683, 94108, 123, 5]));
443        assert_eq!("neHrCa", hash_ids.encode(&[1, 2, 3]));
444        assert_eq!("LRCgf2", hash_ids.encode(&[2, 4, 6]));
445        assert_eq!("JOMh1", hash_ids.encode(&[99, 25]));
446
447        assert_eq!(vec![683, 94108, 123, 5], hash_ids.decode("QWyf8yboH7KT2").unwrap());
448        assert_eq!(vec![1, 2, 3], hash_ids.decode("neHrCa").unwrap());
449        assert_eq!(vec![2, 4, 6], hash_ids.decode("LRCgf2").unwrap());
450        assert_eq!(vec![99, 25], hash_ids.decode("JOMh1").unwrap());
451    }
452
453    #[test]
454    fn test_alphabet() {
455        let hash_ids = HashIds::builder().with_alphabet(r##"!"#%&',-/0123456789:;<=>ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz~"##).finish().unwrap();
456
457        assert_eq!("_nJUNTVU3", hash_ids.encode(&[2839, 12, 32, 5]));
458        assert_eq!("7xfYh2", hash_ids.encode(&[1, 2, 3]));
459        assert_eq!("Z6R>", hash_ids.encode(&[23832]));
460        assert_eq!("AYyIB", hash_ids.encode(&[99, 25]));
461
462        assert_eq!(vec![2839, 12, 32, 5], hash_ids.decode("_nJUNTVU3").unwrap());
463        assert_eq!(vec![1, 2, 3], hash_ids.decode("7xfYh2").unwrap());
464        assert_eq!(vec![23832], hash_ids.decode("Z6R>").unwrap());
465        assert_eq!(vec![99, 25], hash_ids.decode("AYyIB").unwrap());
466    }
467
468    #[test]
469    fn test_min_length() {
470        let hash_ids = HashIds::builder().with_min_length(25).finish();
471
472        assert_eq!("pO3K69b86jzc6krI416enr2B5", hash_ids.encode(&[7452, 2967, 21401]));
473        assert_eq!("gyOwl4B97bo2fXhVaDR0Znjrq", hash_ids.encode(&[1, 2, 3]));
474        assert_eq!("Nz7x3VXyMYerRmWeOBQn6LlRG", hash_ids.encode(&[6097]));
475        assert_eq!("k91nqP3RBe3lKfDaLJrvy8XjV", hash_ids.encode(&[99, 25]));
476
477        assert_eq!(vec![7452, 2967, 21401], hash_ids.decode("pO3K69b86jzc6krI416enr2B5").unwrap());
478        assert_eq!(vec![1, 2, 3], hash_ids.decode("gyOwl4B97bo2fXhVaDR0Znjrq").unwrap());
479        assert_eq!(vec![6097], hash_ids.decode("Nz7x3VXyMYerRmWeOBQn6LlRG").unwrap());
480        assert_eq!(vec![99, 25], hash_ids.decode("k91nqP3RBe3lKfDaLJrvy8XjV").unwrap());
481    }
482
483    #[test]
484    fn test_all_parameters() {
485        let hash_ids = HashIds::builder()
486            .with_salt("arbitrary salt")
487            .with_alphabet("abcdefghijklmnopqrstuvwxyz")
488            .with_min_length(16)
489            .finish()
490            .unwrap();
491
492        assert_eq!("wygqxeunkatjgkrw", hash_ids.encode(&[7452, 2967, 21401]));
493        assert_eq!("pnovxlaxuriowydb", hash_ids.encode(&[1, 2, 3]));
494        assert_eq!("jkbgxljrjxmlaonp", hash_ids.encode(&[60125]));
495        assert_eq!("erdjpwrgouoxlvbx", hash_ids.encode(&[99, 25]));
496
497        assert_eq!(vec![7452, 2967, 21401], hash_ids.decode("wygqxeunkatjgkrw").unwrap());
498        assert_eq!(vec![1, 2, 3], hash_ids.decode("pnovxlaxuriowydb").unwrap());
499        assert_eq!(vec![60125], hash_ids.decode("jkbgxljrjxmlaonp").unwrap());
500        assert_eq!(vec![99, 25], hash_ids.decode("erdjpwrgouoxlvbx").unwrap());
501    }
502
503    #[test]
504    fn test_alphabet_without_standard_separators() {
505        let hash_ids = HashIds::builder()
506            .with_alphabet("abdegjklmnopqrvwxyzABDEGJKLMNOPQRVWXYZ1234567890")
507            .finish()
508            .unwrap();
509
510        assert_eq!("X50Yg6VPoAO4", hash_ids.encode(&[7452, 2967, 21401]));
511        assert_eq!("GAbDdR", hash_ids.encode(&[1, 2, 3]));
512        assert_eq!("5NMPD", hash_ids.encode(&[60125]));
513        assert_eq!("yGya5", hash_ids.encode(&[99, 25]));
514
515        assert_eq!(vec![7452, 2967, 21401], hash_ids.decode("X50Yg6VPoAO4").unwrap());
516        assert_eq!(vec![1, 2, 3], hash_ids.decode("GAbDdR").unwrap());
517        assert_eq!(vec![60125], hash_ids.decode("5NMPD").unwrap());
518        assert_eq!(vec![99, 25], hash_ids.decode("yGya5").unwrap());
519    }
520
521    #[test]
522    fn test_alphabet_with_two_standard_separators() {
523        let hash_ids = HashIds::builder()
524            .with_alphabet("abdegjklmnopqrvwxyzABDEGJKLMNOPQRVWXYZ1234567890uC")
525            .finish()
526            .unwrap();
527
528        assert_eq!("GJNNmKYzbPBw", hash_ids.encode(&[7452, 2967, 21401]));
529        assert_eq!("DQCXa4", hash_ids.encode(&[1, 2, 3]));
530        assert_eq!("38V1D", hash_ids.encode(&[60125]));
531        assert_eq!("373az", hash_ids.encode(&[99, 25]));
532
533        assert_eq!(vec![7452, 2967, 21401], hash_ids.decode("GJNNmKYzbPBw").unwrap());
534        assert_eq!(vec![1, 2, 3], hash_ids.decode("DQCXa4").unwrap());
535        assert_eq!(vec![60125], hash_ids.decode("38V1D").unwrap());
536        assert_eq!(vec![99, 25], hash_ids.decode("373az").unwrap());
537    }
538
539    #[test]
540    fn test_long() {
541        let hash_ids = HashIds::builder().with_salt("arbitrary salt").finish();
542
543        let up_to_100 = (1..=100).collect::<Vec<_>>();
544
545        assert_eq!("GaHMFdtBf0ceClsgiVIjSrUKh1TyupHXFwt5fQcXCwspilIvSYUQhoT2u0HMF5tVfVc9CEsYiqI6SDUdhyTauBHPaF66t8pfGXcnoC2Vs0ei1YIy8SZ2UPehlyTKZuYJHQyF6wtZafR7c52Cn6skLigpIbGSD7UVkhyZT9xukeHBnFR1tJ2f2ocnVCkVsEQia6IBbSDEUX3hB6TaBuDbHxkFd7tykfrjc55Crjs2GigrIx5SpKUKjhVRTdQuX7H9K", hash_ids.encode(&up_to_100));
546        assert_eq!(up_to_100, hash_ids.decode("GaHMFdtBf0ceClsgiVIjSrUKh1TyupHXFwt5fQcXCwspilIvSYUQhoT2u0HMF5tVfVc9CEsYiqI6SDUdhyTauBHPaF66t8pfGXcnoC2Vs0ei1YIy8SZ2UPehlyTKZuYJHQyF6wtZafR7c52Cn6skLigpIbGSD7UVkhyZT9xukeHBnFR1tJ2f2ocnVCkVsEQia6IBbSDEUX3hB6TaBuDbHxkFd7tykfrjc55Crjs2GigrIx5SpKUKjhVRTdQuX7H9K").unwrap());
547    }
548}