herolib_sid/
base36.rs

1//! Base-36 encoding and decoding for SmartID
2//!
3//! SmartID uses base-36 encoding (0-9, a-z) for human-readable,
4//! compact identifiers.
5//!
6//! # Alphabet
7//!
8//! ```text
9//! 0123456789abcdefghijklmnopqrstuvwxyz
10//! ```
11//!
12//! # Properties
13//!
14//! - Lowercase only
15//! - ASCII safe
16//! - No punctuation
17//! - Visually compact
18//! - Widely understood
19
20/// The base-36 alphabet (0-9, a-z)
21pub const ALPHABET: &[u8; 36] = b"0123456789abcdefghijklmnopqrstuvwxyz";
22
23/// The numeric base for SmartID encoding
24pub const BASE: u64 = 36;
25
26/// Capacity at each ID length
27pub const CAPACITY_4: u64 = 36_u64.pow(4); // 1,679,616
28pub const CAPACITY_5: u64 = 36_u64.pow(5); // 60,466,176
29pub const CAPACITY_6: u64 = 36_u64.pow(6); // 2,176,782,336
30
31/// Encode a numeric value to base-36 string.
32///
33/// # Arguments
34///
35/// * `value` - The numeric value to encode
36///
37/// # Returns
38///
39/// A base-36 encoded string (lowercase, no padding)
40///
41/// # Examples
42///
43/// ```
44/// use herolib_osis::sid::base36::encode;
45///
46/// assert_eq!(encode(0), "0");
47/// assert_eq!(encode(35), "z");
48/// assert_eq!(encode(36), "10");
49/// assert_eq!(encode(1295), "zz");
50/// ```
51pub fn encode(value: u64) -> String {
52    if value == 0 {
53        return "0".to_string();
54    }
55
56    let mut result = Vec::new();
57    let mut n = value;
58
59    while n > 0 {
60        let remainder = (n % BASE) as usize;
61        result.push(ALPHABET[remainder] as char);
62        n /= BASE;
63    }
64
65    result.into_iter().rev().collect()
66}
67
68/// Encode a numeric value to base-36 string with padding.
69///
70/// # Arguments
71///
72/// * `value` - The numeric value to encode
73/// * `min_length` - Minimum length (left-padded with '0')
74///
75/// # Returns
76///
77/// A base-36 encoded string with at least `min_length` characters
78///
79/// # Examples
80///
81/// ```
82/// use herolib_osis::sid::base36::encode_padded;
83///
84/// assert_eq!(encode_padded(0, 4), "0000");
85/// assert_eq!(encode_padded(12, 4), "000c");
86/// assert_eq!(encode_padded(35, 4), "000z");
87/// assert_eq!(encode_padded(36, 4), "0010");
88/// ```
89pub fn encode_padded(value: u64, min_length: usize) -> String {
90    let encoded = encode(value);
91    if encoded.len() >= min_length {
92        encoded
93    } else {
94        let padding = "0".repeat(min_length - encoded.len());
95        format!("{}{}", padding, encoded)
96    }
97}
98
99/// Decode a base-36 string to numeric value.
100///
101/// # Arguments
102///
103/// * `s` - The base-36 encoded string
104///
105/// # Returns
106///
107/// The decoded numeric value, or an error if invalid
108///
109/// # Errors
110///
111/// Returns an error if the string contains characters not in the base-36 alphabet.
112///
113/// # Examples
114///
115/// ```
116/// use herolib_osis::sid::base36::decode;
117///
118/// assert_eq!(decode("0").unwrap(), 0);
119/// assert_eq!(decode("z").unwrap(), 35);
120/// assert_eq!(decode("10").unwrap(), 36);
121/// assert_eq!(decode("zz").unwrap(), 1295);
122/// assert_eq!(decode("0000").unwrap(), 0);
123/// assert_eq!(decode("000c").unwrap(), 12);
124/// ```
125pub fn decode(s: &str) -> Result<u64, DecodeError> {
126    if s.is_empty() {
127        return Err(DecodeError::EmptyString);
128    }
129
130    let mut result: u64 = 0;
131
132    for c in s.chars() {
133        let digit = match c {
134            '0'..='9' => (c as u64) - ('0' as u64),
135            'a'..='z' => (c as u64) - ('a' as u64) + 10,
136            'A'..='Z' => (c as u64) - ('A' as u64) + 10, // Accept uppercase for lenient parsing
137            _ => return Err(DecodeError::InvalidCharacter(c)),
138        };
139
140        result = result
141            .checked_mul(BASE)
142            .ok_or(DecodeError::Overflow)?
143            .checked_add(digit)
144            .ok_or(DecodeError::Overflow)?;
145    }
146
147    Ok(result)
148}
149
150/// Determine the minimum length needed for a given global_id.
151///
152/// # Arguments
153///
154/// * `global_id` - The numeric global ID
155///
156/// # Returns
157///
158/// The minimum character length (4, 5, or 6)
159///
160/// # Examples
161///
162/// ```
163/// use herolib_osis::sid::base36::required_length;
164///
165/// assert_eq!(required_length(0), 4);
166/// assert_eq!(required_length(1_679_615), 4);  // 36^4 - 1
167/// assert_eq!(required_length(1_679_616), 5);  // 36^4
168/// assert_eq!(required_length(60_466_175), 5); // 36^5 - 1
169/// assert_eq!(required_length(60_466_176), 6); // 36^5
170/// ```
171pub fn required_length(global_id: u64) -> usize {
172    if global_id < CAPACITY_4 {
173        4
174    } else if global_id < CAPACITY_5 {
175        5
176    } else {
177        6
178    }
179}
180
181/// Check if a string is a valid base-36 SmartID.
182///
183/// # Arguments
184///
185/// * `s` - The string to validate
186///
187/// # Returns
188///
189/// `true` if the string is a valid SmartID (4-6 lowercase alphanumeric chars)
190pub fn is_valid_sid(s: &str) -> bool {
191    let len = s.len();
192    if !(4..=6).contains(&len) {
193        return false;
194    }
195
196    s.chars().all(|c| c.is_ascii_lowercase() || c.is_ascii_digit())
197}
198
199/// Error type for base-36 decoding
200#[derive(Debug, Clone, PartialEq, Eq)]
201pub enum DecodeError {
202    /// The input string was empty
203    EmptyString,
204    /// An invalid character was encountered
205    InvalidCharacter(char),
206    /// The decoded value overflowed u64
207    Overflow,
208}
209
210impl std::fmt::Display for DecodeError {
211    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212        match self {
213            DecodeError::EmptyString => write!(f, "empty string"),
214            DecodeError::InvalidCharacter(c) => write!(f, "invalid character: '{}'", c),
215            DecodeError::Overflow => write!(f, "numeric overflow"),
216        }
217    }
218}
219
220impl std::error::Error for DecodeError {}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225
226    #[test]
227    fn test_encode_basic() {
228        assert_eq!(encode(0), "0");
229        assert_eq!(encode(1), "1");
230        assert_eq!(encode(9), "9");
231        assert_eq!(encode(10), "a");
232        assert_eq!(encode(35), "z");
233        assert_eq!(encode(36), "10");
234        assert_eq!(encode(37), "11");
235    }
236
237    #[test]
238    fn test_encode_larger_values() {
239        assert_eq!(encode(1295), "zz");
240        assert_eq!(encode(1296), "100");
241        assert_eq!(encode(46655), "zzz");
242        assert_eq!(encode(46656), "1000");
243    }
244
245    #[test]
246    fn test_encode_padded() {
247        assert_eq!(encode_padded(0, 4), "0000");
248        assert_eq!(encode_padded(1, 4), "0001");
249        assert_eq!(encode_padded(12, 4), "000c");
250        assert_eq!(encode_padded(35, 4), "000z");
251        assert_eq!(encode_padded(36, 4), "0010");
252        assert_eq!(encode_padded(1295, 4), "00zz");
253        assert_eq!(encode_padded(46656, 4), "1000");
254        assert_eq!(encode_padded(1679615, 4), "zzzz");
255        assert_eq!(encode_padded(1679616, 5), "10000");
256    }
257
258    #[test]
259    fn test_decode_basic() {
260        assert_eq!(decode("0").unwrap(), 0);
261        assert_eq!(decode("1").unwrap(), 1);
262        assert_eq!(decode("9").unwrap(), 9);
263        assert_eq!(decode("a").unwrap(), 10);
264        assert_eq!(decode("z").unwrap(), 35);
265        assert_eq!(decode("10").unwrap(), 36);
266    }
267
268    #[test]
269    fn test_decode_with_padding() {
270        assert_eq!(decode("0000").unwrap(), 0);
271        assert_eq!(decode("0001").unwrap(), 1);
272        assert_eq!(decode("000c").unwrap(), 12);
273        assert_eq!(decode("00zz").unwrap(), 1295);
274    }
275
276    #[test]
277    fn test_decode_accepts_uppercase() {
278        assert_eq!(decode("A").unwrap(), 10);
279        assert_eq!(decode("Z").unwrap(), 35);
280        assert_eq!(decode("ZZZZ").unwrap(), 1679615);
281    }
282
283    #[test]
284    fn test_decode_errors() {
285        assert!(matches!(decode(""), Err(DecodeError::EmptyString)));
286        assert!(matches!(decode("!"), Err(DecodeError::InvalidCharacter('!'))));
287        assert!(matches!(decode("a-b"), Err(DecodeError::InvalidCharacter('-'))));
288    }
289
290    #[test]
291    fn test_roundtrip() {
292        for value in [0, 1, 35, 36, 1295, 1296, 46655, 46656, 1679615, 1679616] {
293            let encoded = encode(value);
294            let decoded = decode(&encoded).unwrap();
295            assert_eq!(decoded, value, "roundtrip failed for {}", value);
296        }
297    }
298
299    #[test]
300    fn test_required_length() {
301        // 4 char range: 0 to 36^4 - 1 = 1,679,615
302        assert_eq!(required_length(0), 4);
303        assert_eq!(required_length(1_679_615), 4);
304
305        // 5 char range: 36^4 to 36^5 - 1
306        assert_eq!(required_length(1_679_616), 5);
307        assert_eq!(required_length(60_466_175), 5);
308
309        // 6 char range: 36^5 and above
310        assert_eq!(required_length(60_466_176), 6);
311        assert_eq!(required_length(2_176_782_335), 6);
312    }
313
314    #[test]
315    fn test_is_valid_sid() {
316        // Valid SIDs
317        assert!(is_valid_sid("0000"));
318        assert!(is_valid_sid("abcd"));
319        assert!(is_valid_sid("z9a0"));
320        assert!(is_valid_sid("12345"));
321        assert!(is_valid_sid("abcdef"));
322
323        // Invalid: wrong length
324        assert!(!is_valid_sid("abc"));
325        assert!(!is_valid_sid("abcdefg"));
326
327        // Invalid: uppercase
328        assert!(!is_valid_sid("ABCD"));
329
330        // Invalid: special characters
331        assert!(!is_valid_sid("ab-d"));
332        assert!(!is_valid_sid("ab_d"));
333    }
334
335    #[test]
336    fn test_capacity_constants() {
337        assert_eq!(CAPACITY_4, 1_679_616);
338        assert_eq!(CAPACITY_5, 60_466_176);
339        assert_eq!(CAPACITY_6, 2_176_782_336);
340    }
341}