Skip to main content

id_forge/
nanoid.rs

1//! # NanoID generation
2//!
3//! URL-safe random strings of configurable length over a configurable
4//! alphabet. The default alphabet is the 64-character URL-safe set
5//! (`A-Z`, `a-z`, `0-9`, `_`, `-`) and the default length is 21 —
6//! the same defaults as the original JavaScript reference, giving
7//! roughly 1 trillion years to a 1 % collision probability at 1000
8//! IDs/second.
9//!
10//! ```
11//! use id_forge::nanoid;
12//!
13//! let id = nanoid::generate();
14//! assert_eq!(id.len(), 21);
15//!
16//! let short = nanoid::with_length(8);
17//! assert_eq!(short.len(), 8);
18//!
19//! let hex = nanoid::custom(16, b"0123456789abcdef");
20//! assert_eq!(hex.len(), 16);
21//! assert!(hex.chars().all(|c| "0123456789abcdef".contains(c)));
22//! ```
23//!
24//! ## Bias-free selection
25//!
26//! NanoID picks each character by drawing bits from the shared
27//! xoshiro256\*\* generator, masking to the smallest power of two
28//! that covers the alphabet, and rejecting indices that fall above
29//! the alphabet's size. For a 64-character alphabet the acceptance
30//! rate is 100 %; for a 17-character alphabet it's 17/32 = 53 %.
31//! Either way, every character of the alphabet has identical
32//! probability of being chosen.
33//!
34//! The 0.1.0 placeholder used a linear congruential generator with
35//! `byte % alphabet.len()`, which biased the result whenever the
36//! alphabet size was not a power of two. 0.9.3 fixes that.
37//!
38//! ## Randomness quality
39//!
40//! The bit source is the same fast non-cryptographic generator
41//! used by `uuid` and `ulid`. NanoIDs from this crate are suitable
42//! for collision-resistant identifiers, not session tokens.
43
44use core::fmt;
45
46use crate::rng;
47
48/// Default alphabet: URL-safe, 64 characters (`_-` + alphanumeric).
49pub const DEFAULT_ALPHABET: &[u8] =
50    b"_-0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
51
52/// Default length: 21 characters.
53pub const DEFAULT_LENGTH: usize = 21;
54
55/// Generate a NanoID using the default alphabet and length.
56///
57/// # Example
58///
59/// ```
60/// use id_forge::nanoid;
61///
62/// let id = nanoid::generate();
63/// assert_eq!(id.len(), 21);
64/// ```
65pub fn generate() -> String {
66    custom(DEFAULT_LENGTH, DEFAULT_ALPHABET)
67}
68
69/// Generate a NanoID of the given length using the default alphabet.
70///
71/// # Example
72///
73/// ```
74/// use id_forge::nanoid;
75///
76/// let id = nanoid::with_length(10);
77/// assert_eq!(id.len(), 10);
78/// ```
79pub fn with_length(length: usize) -> String {
80    custom(length, DEFAULT_ALPHABET)
81}
82
83/// Generate a NanoID with a custom length and alphabet.
84///
85/// This entry point is permissive: an empty alphabet returns the
86/// empty string, and duplicate bytes in the alphabet are tolerated
87/// (the repeated characters appear with higher probability). Use
88/// [`try_custom`] when you want validation to surface those cases as
89/// an error.
90///
91/// # Example
92///
93/// ```
94/// use id_forge::nanoid;
95///
96/// let id = nanoid::custom(8, b"0123456789ABCDEF");
97/// assert_eq!(id.len(), 8);
98/// assert!(id.chars().all(|c| "0123456789ABCDEF".contains(c)));
99/// ```
100pub fn custom(length: usize, alphabet: &[u8]) -> String {
101    if alphabet.is_empty() || length == 0 {
102        return String::new();
103    }
104    custom_unchecked(length, alphabet)
105}
106
107/// Strict counterpart to [`custom`]: validates the alphabet first.
108///
109/// Returns an error when the alphabet is empty or contains a
110/// duplicate byte. A duplicate would otherwise silently skew the
111/// character distribution.
112///
113/// # Example
114///
115/// ```
116/// use id_forge::nanoid::{self, AlphabetError};
117///
118/// let id = nanoid::try_custom(12, b"abcdef").unwrap();
119/// assert_eq!(id.len(), 12);
120///
121/// assert_eq!(nanoid::try_custom(8, b""), Err(AlphabetError::Empty));
122/// assert!(matches!(
123///     nanoid::try_custom(8, b"aab"),
124///     Err(AlphabetError::Duplicate(b'a'))
125/// ));
126/// ```
127pub fn try_custom(length: usize, alphabet: &[u8]) -> Result<String, AlphabetError> {
128    validate_alphabet(alphabet)?;
129    if length == 0 {
130        return Ok(String::new());
131    }
132    Ok(custom_unchecked(length, alphabet))
133}
134
135/// Verify that an alphabet is non-empty and free of duplicate bytes.
136///
137/// Useful for callers who want to vet a configuration value at
138/// startup once instead of paying the validation cost on every
139/// [`try_custom`] call.
140///
141/// # Example
142///
143/// ```
144/// use id_forge::nanoid::{self, AlphabetError};
145///
146/// assert!(nanoid::validate_alphabet(b"01234567").is_ok());
147/// assert_eq!(nanoid::validate_alphabet(b""), Err(AlphabetError::Empty));
148/// assert_eq!(
149///     nanoid::validate_alphabet(b"aba"),
150///     Err(AlphabetError::Duplicate(b'a'))
151/// );
152/// ```
153pub fn validate_alphabet(alphabet: &[u8]) -> Result<(), AlphabetError> {
154    if alphabet.is_empty() {
155        return Err(AlphabetError::Empty);
156    }
157    let mut seen = [false; 256];
158    for &b in alphabet {
159        if seen[b as usize] {
160            return Err(AlphabetError::Duplicate(b));
161        }
162        seen[b as usize] = true;
163    }
164    Ok(())
165}
166
167/// Error returned by [`try_custom`] and [`validate_alphabet`].
168#[derive(Debug, Clone, Copy, PartialEq, Eq)]
169pub enum AlphabetError {
170    /// The alphabet was the empty slice.
171    Empty,
172    /// The given byte appears more than once. Duplicates would skew
173    /// the character distribution silently.
174    Duplicate(u8),
175}
176
177impl fmt::Display for AlphabetError {
178    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179        match self {
180            Self::Empty => f.write_str("nanoid: alphabet must be non-empty"),
181            Self::Duplicate(b) => {
182                write!(f, "nanoid: duplicate byte 0x{b:02x} in alphabet")
183            }
184        }
185    }
186}
187
188impl std::error::Error for AlphabetError {}
189
190fn custom_unchecked(length: usize, alphabet: &[u8]) -> String {
191    let n = alphabet.len();
192    if n == 1 {
193        // Degenerate: every character is the same.
194        return core::iter::repeat(alphabet[0] as char)
195            .take(length)
196            .collect();
197    }
198    let bits = mask_bits(n);
199    let mask: u64 = (1u64 << bits) - 1;
200
201    let mut out = String::with_capacity(length);
202    let mut buffer: u64 = 0;
203    let mut buffer_bits: u32 = 0;
204    let mut placed = 0;
205
206    while placed < length {
207        if buffer_bits < bits {
208            buffer = rng::next_u64();
209            buffer_bits = 64;
210        }
211        let idx = (buffer & mask) as usize;
212        buffer >>= bits;
213        buffer_bits -= bits;
214        if idx < n {
215            out.push(alphabet[idx] as char);
216            placed += 1;
217        }
218    }
219    out
220}
221
222/// Bits needed to address any index in `0..n` — the smallest `k`
223/// such that `2^k >= n`. For `n = 64` this is 6; for `n = 65` it's 7.
224#[inline]
225const fn mask_bits(n: usize) -> u32 {
226    // n is guaranteed > 1 here.
227    usize::BITS - (n - 1).leading_zeros()
228}
229
230#[cfg(test)]
231mod tests {
232    use super::*;
233    use std::collections::HashSet;
234
235    #[test]
236    fn default_length() {
237        assert_eq!(generate().len(), 21);
238    }
239
240    #[test]
241    fn with_length_correct() {
242        assert_eq!(with_length(10).len(), 10);
243    }
244
245    #[test]
246    fn custom_alphabet_respected() {
247        let id = custom(64, b"01");
248        assert!(id.chars().all(|c| c == '0' || c == '1'));
249        assert_eq!(id.len(), 64);
250    }
251
252    #[test]
253    fn unique_ids() {
254        assert_ne!(generate(), generate());
255    }
256
257    #[test]
258    fn many_default_unique() {
259        let mut set = HashSet::new();
260        for _ in 0..10_000 {
261            assert!(set.insert(generate()));
262        }
263    }
264
265    #[test]
266    fn empty_alphabet_returns_empty() {
267        assert_eq!(custom(10, &[]), "");
268    }
269
270    #[test]
271    fn zero_length_returns_empty() {
272        assert_eq!(custom(0, DEFAULT_ALPHABET), "");
273        assert_eq!(with_length(0), "");
274    }
275
276    #[test]
277    fn single_char_alphabet() {
278        let id = custom(8, b"x");
279        assert_eq!(id, "xxxxxxxx");
280    }
281
282    #[test]
283    fn try_custom_rejects_empty() {
284        assert_eq!(try_custom(8, b""), Err(AlphabetError::Empty));
285    }
286
287    #[test]
288    fn try_custom_rejects_duplicate() {
289        let err = try_custom(8, b"abcda").unwrap_err();
290        assert_eq!(err, AlphabetError::Duplicate(b'a'));
291    }
292
293    #[test]
294    fn try_custom_accepts_valid() {
295        let id = try_custom(12, b"abcdef0123").unwrap();
296        assert_eq!(id.len(), 12);
297        assert!(id.chars().all(|c| "abcdef0123".contains(c)));
298    }
299
300    #[test]
301    fn validate_alphabet_paths() {
302        assert!(validate_alphabet(DEFAULT_ALPHABET).is_ok());
303        assert_eq!(validate_alphabet(b""), Err(AlphabetError::Empty));
304        assert_eq!(
305            validate_alphabet(b"abca"),
306            Err(AlphabetError::Duplicate(b'a'))
307        );
308    }
309
310    #[test]
311    fn non_power_of_two_alphabet_unbiased() {
312        // 17-char alphabet: rejection sampling must keep distribution
313        // uniform. Over a large sample no single char dominates.
314        let alphabet: &[u8] = b"ABCDEFGHIJKLMNOPQ"; // 17 chars
315        let id = custom(170_000, alphabet);
316        let mut counts = [0usize; 17];
317        for c in id.bytes() {
318            let i = alphabet.iter().position(|&b| b == c).unwrap();
319            counts[i] += 1;
320        }
321        // Expected ~10 000 per char; allow 12 % skew.
322        for (i, &n) in counts.iter().enumerate() {
323            assert!(
324                (8_800..=11_200).contains(&n),
325                "alphabet[{i}] ({}) count {n} outside expected band",
326                alphabet[i] as char
327            );
328        }
329    }
330
331    #[test]
332    fn mask_bits_known_values() {
333        assert_eq!(mask_bits(2), 1);
334        assert_eq!(mask_bits(8), 3);
335        assert_eq!(mask_bits(64), 6);
336        assert_eq!(mask_bits(65), 7);
337        assert_eq!(mask_bits(256), 8);
338    }
339
340    #[test]
341    fn length_exact_across_alphabet_sizes() {
342        // ASCII-printable alphabets at increasing sizes to exercise
343        // every mask-bits path (1 .. 8). Output char count must equal
344        // the requested length even when acceptance rate is below 100 %.
345        let printable: Vec<u8> = (b'!'..=b'~').collect(); // 94 unique ASCII bytes
346        for n in [2usize, 7, 16, 33, 64, 65, 93, 94] {
347            let alphabet = &printable[..n];
348            let id = custom(50, alphabet);
349            assert_eq!(id.chars().count(), 50, "size {n}");
350            assert_eq!(id.len(), 50, "size {n}: ASCII alphabet so bytes==chars");
351        }
352    }
353
354    #[test]
355    fn non_ascii_alphabet_counts_chars_not_bytes() {
356        // 200 unique bytes including non-ASCII. Output must have
357        // `length` characters even though each char may encode to
358        // multiple UTF-8 bytes in the returned String.
359        let printable: Vec<u8> = (0..=255).collect();
360        let id = custom(30, &printable);
361        assert_eq!(id.chars().count(), 30);
362    }
363
364    #[test]
365    fn default_alphabet_has_no_duplicates() {
366        assert!(validate_alphabet(DEFAULT_ALPHABET).is_ok());
367        assert_eq!(DEFAULT_ALPHABET.len(), 64);
368    }
369}