1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
//! Crate to encode given integers into CSS valid radixes from
//! a given alphabet.
use lazy_static::lazy_static;
use onig::*;
lazy_static! {
static ref INVALID_FIRST_CHARACTER: Vec<char> = vec![
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_',
];
// Invalid characters in a selector name are:
// - \0-\54: null to comma
// - \56: period (.)
// - \57: slash (/)
// - \72-\100: colon (:) to at (@)
// - \133-\136: left square bracket ([) to caret (^)
// - \140: backtick (`)
// - \173-\177: left brace ({) to delete
static ref INVALID_CHARACTERS: Regex = Regex::new(
r##"(?x)
[\0-\54\56\57\72-\100\133-\136\140\173-\177]
"##
).unwrap();
}
/// Converts an ordinal into an encoded radix.
///
/// Function parameters:
/// - ordinal — Integer to encode, must be a non-negative number.
/// - alphabet — A tuple, note: please use [`into_alphabet_set()`] to create
/// this tuple from an alphabet string, such as: `"0123456789ABCDEF"`.
///
/// # Usage
///
/// ```
/// use encode_selector::*;
/// let new_selector = to_radix(42, into_alphabet_set("A1B2C3"));
/// ```
pub fn to_radix(
ordinal: &usize,
alphabet: &(Vec<char>, Vec<usize>),
) -> String {
// Work out the number of places encoded ordinal will take up.
let base: usize = alphabet.0.len();
let subset: usize = base - alphabet.1.len();
let mut carry: usize = 0;
let mut exponent: u8 = 0;
let mut floor: usize = 1; // base ^ exponent
while *ordinal >= subset * floor + carry {
carry += subset * floor;
exponent += 1;
floor = usize::pow(base, exponent.into());
}
// Calculate the modulo.
//
// Effectively, the first character in the returned encoded selector.
// The modulo value corresponds to a character (a position) in
// the alphabet vector. Remember: modulo has a max of the subset (- 1)
// and not the base (- 1).
let modulo: usize = (ordinal - carry).wrapping_div(floor);
let mut offset: usize = 0;
// Loop over invalid chars vector and make sure offset(s) are accounted for.
for (index, alphabet_position) in alphabet.1.iter().enumerate() {
if modulo + index < *alphabet_position {
break;
}
offset += 1;
}
// Work out index that when converted into alphabet
// will not start with invalid characters.
let mut assigned_index: usize = offset * floor + *ordinal - carry;
// Converting assigned index (base 10) into the supplied alphabet.
// Right to left operation.
let mut encoded_selector = String::new();
for _ in 0..=exponent {
let remainder = assigned_index.rem_euclid(base);
encoded_selector.insert(0, *alphabet.0.get(remainder).unwrap());
assigned_index = (assigned_index - remainder).wrapping_div(base);
}
encoded_selector
}
/// Processes given alphabet string and returns a tuple.
///
/// - `0`. — sanitised vector of chars that are all unique and are
/// acceptable CSS characters.
/// - `1`. — supplementary vector of the (zero-based index) positions of
/// characters from the first vector which a selector name cannot start with.
///
/// # Usage
///
/// ```
/// use encode_selector::*;
/// let alphabet = into_alphabet_set("0123456789ABCDEF");
/// ```
///
/// ## Further examples
///
/// ```
/// # use encode_selector::*;
/// assert_eq!(
/// into_alphabet_set("AaBC"),
/// (vec!['A', 'a', 'B', 'C'], vec![])
/// );
/// ```
///
/// ```
/// # use encode_selector::*;
/// assert_eq!(
/// into_alphabet_set("AABCD123B"),
/// (vec!['A', 'B', 'C', 'D', '1', '2', '3'], vec![4, 5, 6])
/// );
/// ```
pub fn into_alphabet_set(alphabet: &str) -> (Vec<char>, Vec<usize>) {
let mut alphabet_set: Vec<char> = Vec::new();
// Sanitise alphabet, remove any invalid characters
let sanitised_alphabet = INVALID_CHARACTERS.replace_all(alphabet, "");
// Removing any duplicate characters.
for char in sanitised_alphabet.chars() {
if !alphabet_set.contains(&char) {
alphabet_set.push(char);
}
}
// Noting the positions of characters that are blacklisted from
// being the first character in a encoded selector name.
let invalid_as_first_char_positions: Vec<usize> = alphabet_set
.iter()
.enumerate()
.filter_map(|(index, char)| {
match INVALID_FIRST_CHARACTER.contains(char) {
true => Some(index),
false => None,
}
})
.collect();
(alphabet_set, invalid_as_first_char_positions)
}