mod_rand/charsets.rs
1//! # Charset constants
2//!
3//! Byte-slice constants for the most common ASCII character sets used
4//! by mod-rand's string-generation helpers (`gen_string`,
5//! `random_string`, etc.). Each constant is a `&'static [u8]` of ASCII
6//! bytes (every byte < 128); no allocation, no `String` wrappers.
7//!
8//! All constants here are available in `no_std`.
9//!
10//! ## Picking a charset
11//!
12//! - [`ALPHANUMERIC`] — the default for most random IDs (62 chars).
13//! - [`ALPHA_LOWER`] / [`ALPHA_UPPER`] — when case must be uniform.
14//! - [`HEX_LOWER`] / [`HEX_UPPER`] — for tokens that need only
15//! hex-safe characters (logging, color codes, raw byte tokens).
16//! - [`URL_SAFE`] — RFC 4648 §5 URL-safe alphabet. Safe in URL paths
17//! and query strings without percent-encoding.
18//! - [`BASE58`] — Bitcoin's alphabet. Omits visually-ambiguous
19//! characters (`0`, `O`, `I`, `l`).
20//! - [`BASE64`] — RFC 4648 §4 standard. **Includes `+` and `/`**,
21//! which require percent-encoding in URLs; use [`URL_SAFE`] for
22//! URL contexts.
23//!
24//! ## Custom charsets
25//!
26//! Every `gen_string` / `random_string` entry point also accepts a
27//! caller-supplied `&[u8]`. The caller MUST provide an ASCII charset
28//! (every byte < 128); the string-generation helpers reject non-ASCII
29//! charsets with a panic (Tier 1, Tier 2) or `io::Error` (Tier 3) so
30//! the resulting `String` is never a malformed UTF-8 sequence.
31//!
32//! ## Example
33//!
34//! ```
35//! use mod_rand::charsets;
36//! use mod_rand::tier1::Xoshiro256;
37//!
38//! let mut rng = Xoshiro256::seed_from_u64(1);
39//! let id = rng.gen_string(12, charsets::ALPHANUMERIC);
40//! assert_eq!(id.len(), 12);
41//! ```
42
43/// All ASCII letters and digits — `A-Z`, `a-z`, `0-9`. 62 characters.
44///
45/// The most common default for random identifiers: roughly 5.95 bits
46/// of entropy per character, no special characters, case-sensitive.
47pub const ALPHANUMERIC: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
48
49/// ASCII letters only — `A-Z`, `a-z`. 52 characters.
50pub const ALPHA: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
51
52/// Lowercase ASCII letters — `a-z`. 26 characters.
53pub const ALPHA_LOWER: &[u8] = b"abcdefghijklmnopqrstuvwxyz";
54
55/// Uppercase ASCII letters — `A-Z`. 26 characters.
56pub const ALPHA_UPPER: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
57
58/// ASCII digits — `0-9`. 10 characters.
59pub const NUMERIC: &[u8] = b"0123456789";
60
61/// Lowercase hex digits — `0-9`, `a-f`. 16 characters.
62pub const HEX_LOWER: &[u8] = b"0123456789abcdef";
63
64/// Uppercase hex digits — `0-9`, `A-F`. 16 characters.
65pub const HEX_UPPER: &[u8] = b"0123456789ABCDEF";
66
67/// URL-safe base64 alphabet — RFC 4648 §5. `A-Z`, `a-z`, `0-9`, `-`,
68/// `_`. 64 characters.
69///
70/// Safe to embed directly in URL paths and query strings without
71/// percent-encoding.
72pub const URL_SAFE: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
73
74/// Base58 alphabet (Bitcoin / Flickr). Omits the visually-ambiguous
75/// `0`, `O`, `I`, `l`. 58 characters.
76pub const BASE58: &[u8] = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
77
78/// Standard base64 alphabet — RFC 4648 §4. `A-Z`, `a-z`, `0-9`, `+`,
79/// `/`. 64 characters.
80///
81/// Note: `+` and `/` require percent-encoding in URL contexts; prefer
82/// [`URL_SAFE`] when generating identifiers that go into URLs.
83pub const BASE64: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
84
85#[cfg(test)]
86mod tests {
87 use super::*;
88
89 #[test]
90 fn expected_lengths() {
91 assert_eq!(ALPHANUMERIC.len(), 62);
92 assert_eq!(ALPHA.len(), 52);
93 assert_eq!(ALPHA_LOWER.len(), 26);
94 assert_eq!(ALPHA_UPPER.len(), 26);
95 assert_eq!(NUMERIC.len(), 10);
96 assert_eq!(HEX_LOWER.len(), 16);
97 assert_eq!(HEX_UPPER.len(), 16);
98 assert_eq!(URL_SAFE.len(), 64);
99 assert_eq!(BASE58.len(), 58);
100 assert_eq!(BASE64.len(), 64);
101 }
102
103 #[test]
104 fn all_charsets_are_ascii() {
105 for cs in [
106 ALPHANUMERIC,
107 ALPHA,
108 ALPHA_LOWER,
109 ALPHA_UPPER,
110 NUMERIC,
111 HEX_LOWER,
112 HEX_UPPER,
113 URL_SAFE,
114 BASE58,
115 BASE64,
116 ] {
117 assert!(cs.iter().all(|&b| b < 128), "non-ASCII byte in charset");
118 }
119 }
120
121 #[test]
122 fn base58_omits_ambiguous() {
123 for &b in BASE58 {
124 let c = b as char;
125 assert!(
126 !matches!(c, '0' | 'O' | 'I' | 'l'),
127 "base58 must omit ambiguous {c}"
128 );
129 }
130 }
131
132 #[test]
133 fn no_duplicates_within_charset() {
134 for cs in [
135 ALPHANUMERIC,
136 ALPHA,
137 ALPHA_LOWER,
138 ALPHA_UPPER,
139 NUMERIC,
140 HEX_LOWER,
141 HEX_UPPER,
142 URL_SAFE,
143 BASE58,
144 BASE64,
145 ] {
146 for (i, &b) in cs.iter().enumerate() {
147 assert!(!cs[i + 1..].contains(&b), "duplicate byte {b} in charset");
148 }
149 }
150 }
151}