tiny_id/
lib.rs

1#![doc = include_str!("../README.md")]
2
3mod lcm;
4
5use lcm::LinearCongruentMultiplier;
6use rand_chacha::ChaCha12Rng;
7
8#[cfg(feature = "getrandom")]
9use rand_chacha::rand_core::SeedableRng;
10
11#[cfg(feature = "serde")]
12use serde::{Deserialize, Serialize};
13
14use rand::Rng;
15
16/// Stores the state required to generate short codes, and implements short code generation.
17///
18/// ```
19/// let mut generator = tiny_id::ShortCodeGenerator::new_lowercase_alphanumeric(5);
20/// let result: String = generator.next_string();
21/// assert_eq!(5, result.len());
22/// ```
23#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
24#[derive(Clone, Debug)]
25pub struct ShortCodeGenerator<T: Copy> {
26    lcm: LinearCongruentMultiplier,
27    offset: u64,
28    alphabet: Vec<T>,
29    length: u32,
30    exhaustion_strategy: ExhaustionStrategy,
31    
32    /// Random number generator used to seed future LCMs if ExhaustionStrategy is
33    /// ExtendLength. For other exhaustion strategies, it is set but never used because
34    /// the initial LCM is never replaced.
35    rng: Option<ChaCha12Rng>,
36    
37    /// Skip is used to enable partitioning. It forces the generator to skip
38    /// over the given number of values between generated codes, enabling
39    /// other partitions to use those codes.
40    skip: Option<u32>,
41    
42    /// When skip is in use, we do not want to skip the first value generated
43    /// by an rng, so skip_after_next is initially false. When the first random
44    /// value is generated, it is set to true, enabling the skip before subsequent
45    /// random generations.
46    #[cfg_attr(feature = "serialize", serde(default))]
47    skip_before_next: bool,
48}
49
50impl ShortCodeGenerator<char> {
51    /// Create a short code generator using numeric digits.
52    #[cfg(feature = "getrandom")]
53    pub fn new_numeric(length: usize) -> ShortCodeGenerator<char> {
54        Self::with_alphabet("0123456789".chars().collect(), length)
55    }
56
57    /// Create a short code generator using lowercase alphanumeric characters.
58    #[cfg(feature = "getrandom")]
59    pub fn new_lowercase_alphanumeric(length: usize) -> Self {
60        Self::with_alphabet(
61            "0123456789abcdefghijklmnopqrstuvwxyz".chars().collect(),
62            length,
63        )
64    }
65
66    /// Create a short code generator using upper and lowercase alphanumeric characters.
67    #[cfg(feature = "getrandom")]
68    pub fn new_alphanumeric(length: usize) -> Self {
69        Self::with_alphabet(
70            "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
71                .chars()
72                .collect(),
73            length,
74        )
75    }
76
77    /// Create a short code generator using uppercase characters.
78    #[cfg(feature = "getrandom")]
79    pub fn new_uppercase(length: usize) -> Self {
80        Self::with_alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZ".chars().collect(), length)
81    }
82
83    /// Return the next short code, represented as a string.
84    /// All `next_*` calls are equivalent to each other in terms of the
85    /// resulting state of self.
86    pub fn next_string(&mut self) -> String {
87        self.next_vec().into_iter().collect()
88    }
89}
90
91impl<T: Copy> ShortCodeGenerator<T> {
92    pub fn into_partitioned_generators(self, generators: u32) -> Vec<Self> {
93        (0..generators).map(
94            move |offset| {
95                let mut gen = self.clone();
96                if gen.skip.is_some() {
97                    panic!("Can't use into_partitioned_generators on a generator that is already parallel.");
98                }
99
100                for _ in 0..offset {
101                    gen.next_int();
102                }
103                gen.skip_before_next = false;
104                gen.skip = Some(generators - 1);
105
106                gen
107            }
108        ).collect()
109    }
110
111    /// Create a short code generator using a given alphabet, using the given
112    /// ChaCha12Rng random number generator.
113    pub fn with_alphabet_and_rng(alphabet: Vec<T>, length: usize, mut rng: ChaCha12Rng) -> Self {
114        use lcm::generate_a;
115
116        let m_base = alphabet.len() as u32;
117        let m = (m_base as u64).pow(length as u32);
118        let a = generate_a(m_base) as u64;
119        let lcm_seed = rng.gen_range(0..m) as u64;
120        let offset = rng.gen_range(0..m) as u64;
121
122        Self {
123            alphabet,
124            lcm: LinearCongruentMultiplier::new(lcm_seed, m, 1, a),
125            offset,
126            length: length as u32,
127            exhaustion_strategy: ExhaustionStrategy::default(),
128            rng: Some(rng),
129            skip: None,
130            skip_before_next: false,
131        }
132    }
133
134    /// Create a short code generator using a given alphabet.
135    #[cfg(feature = "getrandom")]
136    pub fn with_alphabet(alphabet: Vec<T>, length: usize) -> Self {
137        let mut seed: [u8; 32] = Default::default();
138        getrandom::getrandom(&mut seed).expect("Error getting entropy.");
139        let rng = ChaCha12Rng::from_seed(seed);
140        Self::with_alphabet_and_rng(alphabet, length, rng)
141    }
142
143    fn step(&mut self) -> u64 {
144        if self.lcm.exhausted() {
145            match self.exhaustion_strategy {
146                ExhaustionStrategy::Cycle => {}
147                ExhaustionStrategy::Panic => panic!("Exhausted."),
148                ExhaustionStrategy::IncreaseLength => {
149                    let rng = if let Some(rng) = self.rng.clone() {
150                        rng
151                    } else {
152                        #[cfg(feature = "getrandom")]
153                        {
154                            let mut seed: [u8; 32] = Default::default();
155                            getrandom::getrandom(&mut seed).expect("Error getting entropy.");
156                            ChaCha12Rng::from_seed(seed)
157                        }
158
159                        #[cfg(not(feature = "getrandom"))]
160                        panic!("Need crate feature getrandom to increase the length of a pre-0.1.4 ShortCodeGenerator. See https://github.com/paulgb/tiny_id/issues/2")
161                    };
162
163                    // These values of self are initialized by with_alphabet_and_rng, so we preserve them
164                    // on the stack and overwrite them.
165                    let skip = self.skip;
166                    let skip_before_next = self.skip_before_next;
167
168                    *self = ShortCodeGenerator::with_alphabet_and_rng(
169                        core::mem::take(&mut self.alphabet),
170                        self.length as usize + 1,
171                        rng,
172                    );
173
174                    self.skip = skip;
175                    self.skip_before_next = skip_before_next;
176                }
177            }
178        }
179        self.lcm.next()
180    }
181
182    /// Return the next short code, represented as an integer.
183    /// All `next_*` calls are equivalent to each other in terms of the
184    /// resulting state of self.
185    pub fn next_int(&mut self) -> u64 {
186        if self.skip_before_next {
187            for _ in 0..self.skip.unwrap_or_default() {
188                println!("h0");
189                self.step();
190            }    
191        } else {
192            self.skip_before_next = true;
193        }
194
195        let mut result = self.step();
196        result = (result + self.offset) % self.lcm.m;
197
198        result
199    }
200
201    /// Deprecated alias for [`ShortCodeGenerator::next_vec`].
202    #[deprecated(
203        since = "0.1.4",
204        note = "Deprecated to avoid confusion with Iterator::next. Use next_vec instead."
205    )]
206    pub fn next(&mut self) -> Vec<T> {
207        self.next_vec()
208    }
209
210    /// Return the next short code, represented as a vector.
211    /// All `next_*` calls are equivalent to each other in terms of the
212    /// resulting state of self.
213    pub fn next_vec(&mut self) -> Vec<T> {
214        let mut result = Vec::with_capacity(self.length as usize);
215        let alphabet_size = self.alphabet.len() as u64;
216        let mut value = self.next_int();
217
218        for _ in 0..self.length {
219            result.push(self.alphabet[(value % alphabet_size) as usize]);
220            value /= alphabet_size;
221        }
222
223        result
224    }
225
226    /// Set the exhaustion strategy of this short code generator. Preserves
227    /// other state.
228    pub fn exhaustion_strategy(mut self, strategy: ExhaustionStrategy) -> Self {
229        self.exhaustion_strategy = strategy;
230        self
231    }
232}
233
234/// Determines what happens when all codes (for a given alphabet and length) have
235/// been exhausted.
236#[derive(Clone, Copy, Debug)]
237#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
238pub enum ExhaustionStrategy {
239    /// Repeat the sequences of short codes, starting with the first one.
240    /// This guarantees a collision if codes live indefinitely, but can be useful
241    /// when codes are used temporarily as it ensures that a code is never reused
242    /// until *all other possible codes* have been used between usages.
243    Cycle,
244
245    /// Increase the length of the sequence, and continue. This is the default and
246    /// avoids collisions.
247    IncreaseLength,
248
249    /// Panics. This is a fail-fast option
250    /// for cases where you don't expect the codes to ever become exhausted, and
251    /// either creating a collision or increasing the length of the code would be
252    /// incorrect behavior.
253    Panic,
254}
255
256impl Default for ExhaustionStrategy {
257    fn default() -> Self {
258        ExhaustionStrategy::IncreaseLength
259    }
260}
261
262#[cfg(feature = "getrandom")]
263#[cfg(test)]
264mod tests {
265    use std::collections::HashSet;
266
267    use super::*;
268
269    #[test]
270    fn test_parallel_generators() {
271        let mut gen = ShortCodeGenerator::new_lowercase_alphanumeric(1);
272        let mut par_gens = gen.clone().into_partitioned_generators(7);
273
274        for _ in 0..10000 {
275            for par_gen in &mut par_gens {
276                let expected = gen.next_string();
277                let actual = par_gen.next_string();
278                assert_eq!(expected, actual);
279            }
280        }
281    }
282
283    #[test]
284    fn test_string_generator() {
285        assert_eq!(
286            3,
287            ShortCodeGenerator::with_alphabet("ABC".chars().collect(), 3)
288                .next_string()
289                .len()
290        );
291        assert_eq!(
292            5,
293            ShortCodeGenerator::with_alphabet("ABC".chars().collect(), 5)
294                .next_string()
295                .len()
296        );
297        assert_eq!(
298            10,
299            ShortCodeGenerator::with_alphabet("ABC".chars().collect(), 10)
300                .next_string()
301                .len()
302        );
303    }
304
305    #[test]
306    #[should_panic]
307    fn test_exhaustion_panic() {
308        let mut gen_repeat =
309            ShortCodeGenerator::new_numeric(2).exhaustion_strategy(ExhaustionStrategy::Panic);
310
311        for _ in 0..100 {
312            let result = gen_repeat.next_vec();
313            assert_eq!(2, result.len())
314        }
315
316        gen_repeat.next_vec();
317    }
318
319    #[test]
320    fn test_exhaustion_increase_length() {
321        let mut gen_repeat = ShortCodeGenerator::new_numeric(2);
322
323        for _ in 0..100 {
324            let result = gen_repeat.next_vec();
325            assert_eq!(2, result.len())
326        }
327
328        let result = gen_repeat.next_vec();
329
330        assert_eq!(3, result.len());
331    }
332
333    fn test_generator_helper(alphabet_size: u32, length: usize) {
334        let alphabet: Vec<u32> = (0..alphabet_size).into_iter().collect();
335        let permutations: u64 = (alphabet_size as u64).pow(length as u32);
336
337        let mut gen = ShortCodeGenerator::with_alphabet(alphabet, length)
338            .exhaustion_strategy(ExhaustionStrategy::Cycle);
339        let first = gen.next_vec();
340        let mut seen = HashSet::new();
341
342        for i in 0..permutations {
343            let next = gen.next_vec();
344
345            // Ensure we haven't seen this
346            assert!(!seen.contains(&next));
347
348            // If this is the last unique id generated, ensure that it equals the
349            // seed.
350            if i == permutations - 1 {
351                assert_eq!(first, next);
352            }
353
354            seen.insert(next);
355        }
356
357        // Ensure that we've seen every possible value. This is a test-of-a-test,
358        // because it should never fail (even with a faulty generator) if our other
359        // asserts pass, by the pigeonhole principle.
360        assert_eq!(permutations, seen.len() as u64);
361    }
362
363    #[test]
364    fn test_generator() {
365        test_generator_helper(3, 3);
366        test_generator_helper(7, 3);
367        test_generator_helper(4, 9);
368        test_generator_helper(26, 4);
369        test_generator_helper(44, 3);
370        test_generator_helper(7, 5);
371    }
372
373    #[test]
374    fn test_lcm() {
375        // Examples from wikipedia:
376        // https://en.wikipedia.org/wiki/Linear_congruential_generator#/media/File:Linear_congruential_generator_visualisation.svg
377
378        {
379            let mut lcm = LinearCongruentMultiplier::new(1, 9, 0, 2);
380
381            assert!(!lcm.exhausted());
382            assert_eq!(1, lcm.next());
383            assert!(!lcm.exhausted());
384            assert_eq!(2, lcm.next());
385            assert!(!lcm.exhausted());
386            assert_eq!(4, lcm.next());
387            assert!(!lcm.exhausted());
388            assert_eq!(8, lcm.next());
389            assert!(!lcm.exhausted());
390            assert_eq!(7, lcm.next());
391            assert!(!lcm.exhausted());
392            assert_eq!(5, lcm.next());
393            assert!(lcm.exhausted());
394            assert_eq!(1, lcm.next());
395            assert!(lcm.exhausted());
396        }
397
398        {
399            let mut lcm = LinearCongruentMultiplier::new(3, 9, 0, 2);
400
401            assert_eq!(3, lcm.next());
402            assert_eq!(6, lcm.next());
403            assert_eq!(3, lcm.next());
404        }
405
406        {
407            let mut lcm = LinearCongruentMultiplier::new(0, 9, 1, 4);
408
409            assert_eq!(0, lcm.next());
410            assert_eq!(1, lcm.next());
411            assert_eq!(5, lcm.next());
412            assert_eq!(3, lcm.next());
413            assert_eq!(4, lcm.next());
414            assert_eq!(8, lcm.next());
415            assert_eq!(6, lcm.next());
416            assert_eq!(7, lcm.next());
417            assert_eq!(2, lcm.next());
418            assert_eq!(0, lcm.next());
419        }
420    }
421
422    #[test]
423    fn test_0_1_3_stability() {
424        let mut gen: ShortCodeGenerator<char> = serde_json::from_str(r#"
425        {
426            "lcm": {
427                "first": 715,
428                "next": 715,
429                "m": 3125,
430                "c": 1,
431                "a": 6,
432                "exhausted": false
433            },
434            "offset": 1097,
435            "alphabet": ["a", "b", "c", "d"],
436            "length": 5,
437            "exhaustion_strategy": "Cycle"
438        }
439        "#).unwrap();
440
441        for _ in 0..100 {
442            gen.next_int();
443        }
444
445        assert_eq!("cddbc", gen.next_string());
446
447        for _ in 0..1000 {
448            gen.next_int();
449        }
450
451        assert_eq!("ccadc", gen.next_string());
452
453        for _ in 0..10000 {
454            gen.next_int();
455        }
456
457        assert_eq!("adacb", gen.next_string());
458    }
459
460    #[test]
461    fn test_0_1_4_stability() {
462        let mut gen: ShortCodeGenerator<char> = serde_json::from_str(r#"
463        {
464            "lcm": {
465              "first": 1,
466              "next": 1,
467              "m": 64,
468              "c": 1,
469              "a": 5,
470              "exhausted": false
471            },
472            "offset": 16,
473            "alphabet": [
474              "g",
475              "h",
476              "i",
477              "j"
478            ],
479            "length": 3,
480            "exhaustion_strategy": "IncreaseLength",
481            "rng": {
482              "seed": [
483                  50, 32, 156, 125, 71, 52, 54, 124, 10, 5, 100, 142, 252, 16, 120, 159, 27, 204,
484                  74, 211, 3, 75, 160, 85, 87, 13, 117, 73, 214, 197, 115, 217
485              ],
486              "stream": 0,
487              "word_pos": 6
488            },
489            "skip": null,
490            "used": false
491          }
492        "#).unwrap();
493
494        for _ in 0..100 {
495            gen.next_int();
496        }
497
498        assert_eq!("ghhh", gen.next_string());
499
500        for _ in 0..1000 {
501            gen.next_int();
502        }
503
504        assert_eq!("jhgij", gen.next_string());
505
506        for _ in 0..10000 {
507            gen.next_int();
508        }
509
510        assert_eq!("jhigggg", gen.next_string());
511    }
512}