id_forge/nanoid.rs
1//! # NanoID generation
2//!
3//! URL-safe random strings of configurable length over a configurable
4//! alphabet. The default alphabet is the 64-character URL-safe set
5//! (`A-Z`, `a-z`, `0-9`, `_`, `-`) and the default length is 21 —
6//! the same defaults as the original JavaScript reference, giving
7//! roughly 1 trillion years to a 1 % collision probability at 1000
8//! IDs/second.
9//!
10//! ```
11//! use id_forge::nanoid;
12//!
13//! let id = nanoid::generate();
14//! assert_eq!(id.len(), 21);
15//!
16//! let short = nanoid::with_length(8);
17//! assert_eq!(short.len(), 8);
18//!
19//! let hex = nanoid::custom(16, b"0123456789abcdef");
20//! assert_eq!(hex.len(), 16);
21//! assert!(hex.chars().all(|c| "0123456789abcdef".contains(c)));
22//! ```
23//!
24//! ## Bias-free selection
25//!
26//! NanoID picks each character by drawing bits from the shared
27//! xoshiro256\*\* generator, masking to the smallest power of two
28//! that covers the alphabet, and rejecting indices that fall above
29//! the alphabet's size. For a 64-character alphabet the acceptance
30//! rate is 100 %; for a 17-character alphabet it's 17/32 = 53 %.
31//! Either way, every character of the alphabet has identical
32//! probability of being chosen.
33//!
34//! The 0.1.0 placeholder used a linear congruential generator with
35//! `byte % alphabet.len()`, which biased the result whenever the
36//! alphabet size was not a power of two. 0.9.3 fixes that.
37//!
38//! ## Randomness quality
39//!
40//! The bit source is the same fast non-cryptographic generator
41//! used by `uuid` and `ulid`. NanoIDs from this crate are suitable
42//! for collision-resistant identifiers, not session tokens.
43
44use core::fmt;
45
46use crate::rng;
47
48/// Default alphabet: URL-safe, 64 characters (`_-` + alphanumeric).
49pub const DEFAULT_ALPHABET: &[u8] =
50 b"_-0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
51
52/// Default length: 21 characters.
53pub const DEFAULT_LENGTH: usize = 21;
54
55/// Generate a NanoID using the default alphabet and length.
56///
57/// # Example
58///
59/// ```
60/// use id_forge::nanoid;
61///
62/// let id = nanoid::generate();
63/// assert_eq!(id.len(), 21);
64/// ```
65pub fn generate() -> String {
66 custom(DEFAULT_LENGTH, DEFAULT_ALPHABET)
67}
68
69/// Generate a NanoID of the given length using the default alphabet.
70///
71/// # Example
72///
73/// ```
74/// use id_forge::nanoid;
75///
76/// let id = nanoid::with_length(10);
77/// assert_eq!(id.len(), 10);
78/// ```
79pub fn with_length(length: usize) -> String {
80 custom(length, DEFAULT_ALPHABET)
81}
82
83/// Generate a NanoID with a custom length and alphabet.
84///
85/// This entry point is permissive: an empty alphabet returns the
86/// empty string, and duplicate bytes in the alphabet are tolerated
87/// (the repeated characters appear with higher probability). Use
88/// [`try_custom`] when you want validation to surface those cases as
89/// an error.
90///
91/// # Example
92///
93/// ```
94/// use id_forge::nanoid;
95///
96/// let id = nanoid::custom(8, b"0123456789ABCDEF");
97/// assert_eq!(id.len(), 8);
98/// assert!(id.chars().all(|c| "0123456789ABCDEF".contains(c)));
99/// ```
100pub fn custom(length: usize, alphabet: &[u8]) -> String {
101 if alphabet.is_empty() || length == 0 {
102 return String::new();
103 }
104 custom_unchecked(length, alphabet)
105}
106
107/// Strict counterpart to [`custom`]: validates the alphabet first.
108///
109/// Returns an error when the alphabet is empty or contains a
110/// duplicate byte. A duplicate would otherwise silently skew the
111/// character distribution.
112///
113/// # Example
114///
115/// ```
116/// use id_forge::nanoid::{self, AlphabetError};
117///
118/// let id = nanoid::try_custom(12, b"abcdef").unwrap();
119/// assert_eq!(id.len(), 12);
120///
121/// assert_eq!(nanoid::try_custom(8, b""), Err(AlphabetError::Empty));
122/// assert!(matches!(
123/// nanoid::try_custom(8, b"aab"),
124/// Err(AlphabetError::Duplicate(b'a'))
125/// ));
126/// ```
127pub fn try_custom(length: usize, alphabet: &[u8]) -> Result<String, AlphabetError> {
128 validate_alphabet(alphabet)?;
129 if length == 0 {
130 return Ok(String::new());
131 }
132 Ok(custom_unchecked(length, alphabet))
133}
134
135/// Verify that an alphabet is non-empty and free of duplicate bytes.
136///
137/// Useful for callers who want to vet a configuration value at
138/// startup once instead of paying the validation cost on every
139/// [`try_custom`] call.
140///
141/// # Example
142///
143/// ```
144/// use id_forge::nanoid::{self, AlphabetError};
145///
146/// assert!(nanoid::validate_alphabet(b"01234567").is_ok());
147/// assert_eq!(nanoid::validate_alphabet(b""), Err(AlphabetError::Empty));
148/// assert_eq!(
149/// nanoid::validate_alphabet(b"aba"),
150/// Err(AlphabetError::Duplicate(b'a'))
151/// );
152/// ```
153pub fn validate_alphabet(alphabet: &[u8]) -> Result<(), AlphabetError> {
154 if alphabet.is_empty() {
155 return Err(AlphabetError::Empty);
156 }
157 let mut seen = [false; 256];
158 for &b in alphabet {
159 if seen[b as usize] {
160 return Err(AlphabetError::Duplicate(b));
161 }
162 seen[b as usize] = true;
163 }
164 Ok(())
165}
166
167/// Error returned by [`try_custom`] and [`validate_alphabet`].
168#[derive(Debug, Clone, Copy, PartialEq, Eq)]
169pub enum AlphabetError {
170 /// The alphabet was the empty slice.
171 Empty,
172 /// The given byte appears more than once. Duplicates would skew
173 /// the character distribution silently.
174 Duplicate(u8),
175}
176
177impl fmt::Display for AlphabetError {
178 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179 match self {
180 Self::Empty => f.write_str("nanoid: alphabet must be non-empty"),
181 Self::Duplicate(b) => {
182 write!(f, "nanoid: duplicate byte 0x{b:02x} in alphabet")
183 }
184 }
185 }
186}
187
188impl std::error::Error for AlphabetError {}
189
190fn custom_unchecked(length: usize, alphabet: &[u8]) -> String {
191 let n = alphabet.len();
192 if n == 1 {
193 // Degenerate: every character is the same.
194 return core::iter::repeat(alphabet[0] as char)
195 .take(length)
196 .collect();
197 }
198 let bits = mask_bits(n);
199 let mask: u64 = (1u64 << bits) - 1;
200
201 let mut out = String::with_capacity(length);
202 let mut buffer: u64 = 0;
203 let mut buffer_bits: u32 = 0;
204 let mut placed = 0;
205
206 while placed < length {
207 if buffer_bits < bits {
208 buffer = rng::next_u64();
209 buffer_bits = 64;
210 }
211 let idx = (buffer & mask) as usize;
212 buffer >>= bits;
213 buffer_bits -= bits;
214 if idx < n {
215 out.push(alphabet[idx] as char);
216 placed += 1;
217 }
218 }
219 out
220}
221
222/// Bits needed to address any index in `0..n` — the smallest `k`
223/// such that `2^k >= n`. For `n = 64` this is 6; for `n = 65` it's 7.
224#[inline]
225const fn mask_bits(n: usize) -> u32 {
226 // n is guaranteed > 1 here.
227 usize::BITS - (n - 1).leading_zeros()
228}
229
230#[cfg(test)]
231mod tests {
232 use super::*;
233 use std::collections::HashSet;
234
235 #[test]
236 fn default_length() {
237 assert_eq!(generate().len(), 21);
238 }
239
240 #[test]
241 fn with_length_correct() {
242 assert_eq!(with_length(10).len(), 10);
243 }
244
245 #[test]
246 fn custom_alphabet_respected() {
247 let id = custom(64, b"01");
248 assert!(id.chars().all(|c| c == '0' || c == '1'));
249 assert_eq!(id.len(), 64);
250 }
251
252 #[test]
253 fn unique_ids() {
254 assert_ne!(generate(), generate());
255 }
256
257 #[test]
258 fn many_default_unique() {
259 let mut set = HashSet::new();
260 for _ in 0..10_000 {
261 assert!(set.insert(generate()));
262 }
263 }
264
265 #[test]
266 fn empty_alphabet_returns_empty() {
267 assert_eq!(custom(10, &[]), "");
268 }
269
270 #[test]
271 fn zero_length_returns_empty() {
272 assert_eq!(custom(0, DEFAULT_ALPHABET), "");
273 assert_eq!(with_length(0), "");
274 }
275
276 #[test]
277 fn single_char_alphabet() {
278 let id = custom(8, b"x");
279 assert_eq!(id, "xxxxxxxx");
280 }
281
282 #[test]
283 fn try_custom_rejects_empty() {
284 assert_eq!(try_custom(8, b""), Err(AlphabetError::Empty));
285 }
286
287 #[test]
288 fn try_custom_rejects_duplicate() {
289 let err = try_custom(8, b"abcda").unwrap_err();
290 assert_eq!(err, AlphabetError::Duplicate(b'a'));
291 }
292
293 #[test]
294 fn try_custom_accepts_valid() {
295 let id = try_custom(12, b"abcdef0123").unwrap();
296 assert_eq!(id.len(), 12);
297 assert!(id.chars().all(|c| "abcdef0123".contains(c)));
298 }
299
300 #[test]
301 fn validate_alphabet_paths() {
302 assert!(validate_alphabet(DEFAULT_ALPHABET).is_ok());
303 assert_eq!(validate_alphabet(b""), Err(AlphabetError::Empty));
304 assert_eq!(
305 validate_alphabet(b"abca"),
306 Err(AlphabetError::Duplicate(b'a'))
307 );
308 }
309
310 #[test]
311 fn non_power_of_two_alphabet_unbiased() {
312 // 17-char alphabet: rejection sampling must keep distribution
313 // uniform. Over a large sample no single char dominates.
314 let alphabet: &[u8] = b"ABCDEFGHIJKLMNOPQ"; // 17 chars
315 let id = custom(170_000, alphabet);
316 let mut counts = [0usize; 17];
317 for c in id.bytes() {
318 let i = alphabet.iter().position(|&b| b == c).unwrap();
319 counts[i] += 1;
320 }
321 // Expected ~10 000 per char; allow 12 % skew.
322 for (i, &n) in counts.iter().enumerate() {
323 assert!(
324 (8_800..=11_200).contains(&n),
325 "alphabet[{i}] ({}) count {n} outside expected band",
326 alphabet[i] as char
327 );
328 }
329 }
330
331 #[test]
332 fn mask_bits_known_values() {
333 assert_eq!(mask_bits(2), 1);
334 assert_eq!(mask_bits(8), 3);
335 assert_eq!(mask_bits(64), 6);
336 assert_eq!(mask_bits(65), 7);
337 assert_eq!(mask_bits(256), 8);
338 }
339
340 #[test]
341 fn length_exact_across_alphabet_sizes() {
342 // ASCII-printable alphabets at increasing sizes to exercise
343 // every mask-bits path (1 .. 8). Output char count must equal
344 // the requested length even when acceptance rate is below 100 %.
345 let printable: Vec<u8> = (b'!'..=b'~').collect(); // 94 unique ASCII bytes
346 for n in [2usize, 7, 16, 33, 64, 65, 93, 94] {
347 let alphabet = &printable[..n];
348 let id = custom(50, alphabet);
349 assert_eq!(id.chars().count(), 50, "size {n}");
350 assert_eq!(id.len(), 50, "size {n}: ASCII alphabet so bytes==chars");
351 }
352 }
353
354 #[test]
355 fn non_ascii_alphabet_counts_chars_not_bytes() {
356 // 200 unique bytes including non-ASCII. Output must have
357 // `length` characters even though each char may encode to
358 // multiple UTF-8 bytes in the returned String.
359 let printable: Vec<u8> = (0..=255).collect();
360 let id = custom(30, &printable);
361 assert_eq!(id.chars().count(), 30);
362 }
363
364 #[test]
365 fn default_alphabet_has_no_duplicates() {
366 assert!(validate_alphabet(DEFAULT_ALPHABET).is_ok());
367 assert_eq!(DEFAULT_ALPHABET.len(), 64);
368 }
369}