mod_rand/
charsets.rs

1//! # Charset constants
2//!
3//! Byte-slice constants for the most common ASCII character sets used
4//! by mod-rand's string-generation helpers (`gen_string`,
5//! `random_string`, etc.). Each constant is a `&'static [u8]` of ASCII
6//! bytes (every byte < 128); no allocation, no `String` wrappers.
7//!
8//! All constants here are available in `no_std`.
9//!
10//! ## Picking a charset
11//!
12//! - [`ALPHANUMERIC`] — the default for most random IDs (62 chars).
13//! - [`ALPHA_LOWER`] / [`ALPHA_UPPER`] — when case must be uniform.
14//! - [`HEX_LOWER`] / [`HEX_UPPER`] — for tokens that need only
15//!   hex-safe characters (logging, color codes, raw byte tokens).
16//! - [`URL_SAFE`] — RFC 4648 §5 URL-safe alphabet. Safe in URL paths
17//!   and query strings without percent-encoding.
18//! - [`BASE58`] — Bitcoin's alphabet. Omits visually-ambiguous
19//!   characters (`0`, `O`, `I`, `l`).
20//! - [`BASE64`] — RFC 4648 §4 standard. **Includes `+` and `/`**,
21//!   which require percent-encoding in URLs; use [`URL_SAFE`] for
22//!   URL contexts.
23//!
24//! ## Custom charsets
25//!
26//! Every `gen_string` / `random_string` entry point also accepts a
27//! caller-supplied `&[u8]`. The caller MUST provide an ASCII charset
28//! (every byte < 128); the string-generation helpers reject non-ASCII
29//! charsets with a panic (Tier 1, Tier 2) or `io::Error` (Tier 3) so
30//! the resulting `String` is never a malformed UTF-8 sequence.
31//!
32//! ## Example
33//!
34//! ```
35//! use mod_rand::charsets;
36//! use mod_rand::tier1::Xoshiro256;
37//!
38//! let mut rng = Xoshiro256::seed_from_u64(1);
39//! let id = rng.gen_string(12, charsets::ALPHANUMERIC);
40//! assert_eq!(id.len(), 12);
41//! ```
42
43/// All ASCII letters and digits — `A-Z`, `a-z`, `0-9`. 62 characters.
44///
45/// The most common default for random identifiers: roughly 5.95 bits
46/// of entropy per character, no special characters, case-sensitive.
47pub const ALPHANUMERIC: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
48
49/// ASCII letters only — `A-Z`, `a-z`. 52 characters.
50pub const ALPHA: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
51
52/// Lowercase ASCII letters — `a-z`. 26 characters.
53pub const ALPHA_LOWER: &[u8] = b"abcdefghijklmnopqrstuvwxyz";
54
55/// Uppercase ASCII letters — `A-Z`. 26 characters.
56pub const ALPHA_UPPER: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
57
58/// ASCII digits — `0-9`. 10 characters.
59pub const NUMERIC: &[u8] = b"0123456789";
60
61/// Lowercase hex digits — `0-9`, `a-f`. 16 characters.
62pub const HEX_LOWER: &[u8] = b"0123456789abcdef";
63
64/// Uppercase hex digits — `0-9`, `A-F`. 16 characters.
65pub const HEX_UPPER: &[u8] = b"0123456789ABCDEF";
66
67/// URL-safe base64 alphabet — RFC 4648 §5. `A-Z`, `a-z`, `0-9`, `-`,
68/// `_`. 64 characters.
69///
70/// Safe to embed directly in URL paths and query strings without
71/// percent-encoding.
72pub const URL_SAFE: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
73
74/// Base58 alphabet (Bitcoin / Flickr). Omits the visually-ambiguous
75/// `0`, `O`, `I`, `l`. 58 characters.
76pub const BASE58: &[u8] = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
77
78/// Standard base64 alphabet — RFC 4648 §4. `A-Z`, `a-z`, `0-9`, `+`,
79/// `/`. 64 characters.
80///
81/// Note: `+` and `/` require percent-encoding in URL contexts; prefer
82/// [`URL_SAFE`] when generating identifiers that go into URLs.
83pub const BASE64: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
84
85#[cfg(test)]
86mod tests {
87    use super::*;
88
89    #[test]
90    fn expected_lengths() {
91        assert_eq!(ALPHANUMERIC.len(), 62);
92        assert_eq!(ALPHA.len(), 52);
93        assert_eq!(ALPHA_LOWER.len(), 26);
94        assert_eq!(ALPHA_UPPER.len(), 26);
95        assert_eq!(NUMERIC.len(), 10);
96        assert_eq!(HEX_LOWER.len(), 16);
97        assert_eq!(HEX_UPPER.len(), 16);
98        assert_eq!(URL_SAFE.len(), 64);
99        assert_eq!(BASE58.len(), 58);
100        assert_eq!(BASE64.len(), 64);
101    }
102
103    #[test]
104    fn all_charsets_are_ascii() {
105        for cs in [
106            ALPHANUMERIC,
107            ALPHA,
108            ALPHA_LOWER,
109            ALPHA_UPPER,
110            NUMERIC,
111            HEX_LOWER,
112            HEX_UPPER,
113            URL_SAFE,
114            BASE58,
115            BASE64,
116        ] {
117            assert!(cs.iter().all(|&b| b < 128), "non-ASCII byte in charset");
118        }
119    }
120
121    #[test]
122    fn base58_omits_ambiguous() {
123        for &b in BASE58 {
124            let c = b as char;
125            assert!(
126                !matches!(c, '0' | 'O' | 'I' | 'l'),
127                "base58 must omit ambiguous {c}"
128            );
129        }
130    }
131
132    #[test]
133    fn no_duplicates_within_charset() {
134        for cs in [
135            ALPHANUMERIC,
136            ALPHA,
137            ALPHA_LOWER,
138            ALPHA_UPPER,
139            NUMERIC,
140            HEX_LOWER,
141            HEX_UPPER,
142            URL_SAFE,
143            BASE58,
144            BASE64,
145        ] {
146            for (i, &b) in cs.iter().enumerate() {
147                assert!(!cs[i + 1..].contains(&b), "duplicate byte {b} in charset");
148            }
149        }
150    }
151}
mod_rand/charsets.rs

mod_rand/
charsets.rs