1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
//! Crate to encode given integers into CSS valid radixes from
//! a given alphabet.

use lazy_static::lazy_static;
use onig::*;




lazy_static! {
	static ref INVALID_FIRST_CHARACTER: Vec<char> = vec![
		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_',
	];

	// Invalid characters in a selector name are:
	// - \0-\54: null to comma
	// - \56: period (.)
	// - \57: slash (/)
	// - \72-\100: colon (:) to at (@)
	// - \133-\136: left square bracket ([) to caret (^)
	// - \140: backtick (`)
	// - \173-\177: left brace ({) to delete
	static ref INVALID_CHARACTERS: Regex = Regex::new(
		r##"(?x)
			[\0-\54\56\57\72-\100\133-\136\140\173-\177]
		"##
	).unwrap();
}




/// Converts an ordinal into an encoded radix.
///
/// Function parameters:
/// - ordinal — Integer to encode, must be a non-negative number.
/// - alphabet — A tuple, note: please use [`into_alphabet_set()`] to create
///   this tuple from an alphabet string, such as: `"0123456789ABCDEF"`.
///
/// # Usage
///
/// ```
/// use encode_selector::*;
/// let new_selector = to_radix(42, into_alphabet_set("A1B2C3"));
/// ```
pub fn to_radix(
	ordinal: &usize,
	alphabet: &(Vec<char>, Vec<usize>),
) -> String {
	// Work out the number of places encoded ordinal will take up.
	let base: usize = alphabet.0.len();
	let subset: usize = base - alphabet.1.len();
	let mut carry: usize = 0;
	let mut exponent: u8 = 0;
	let mut floor: usize = 1; // base ^ exponent

	while *ordinal >= subset * floor + carry {
		carry += subset * floor;
		exponent += 1;
		floor = usize::pow(base, exponent.into());
	}

	// Calculate the modulo.
	//
	// Effectively, the first character in the returned encoded selector.
	// The modulo value corresponds to a character (a position) in
	// the alphabet vector. Remember: modulo has a max of the subset (- 1)
	// and not the base (- 1).
	let modulo: usize = (ordinal - carry).wrapping_div(floor);
	let mut offset: usize = 0;

	// Loop over invalid chars vector and make sure offset(s) are accounted for.
	for (index, alphabet_position) in alphabet.1.iter().enumerate() {
		if modulo + index < *alphabet_position {
			break;
		}
		offset += 1;
	}

	// Work out index that when converted into alphabet
	// will not start with invalid characters.
	let mut assigned_index: usize = offset * floor + *ordinal - carry;

	// Converting assigned index (base 10) into the supplied alphabet.
	// Right to left operation.
	let mut encoded_selector = String::new();
	for _ in 0..=exponent {
		let remainder = assigned_index.rem_euclid(base);
		encoded_selector.insert(0, *alphabet.0.get(remainder).unwrap());
		assigned_index = (assigned_index - remainder).wrapping_div(base);
	}

	encoded_selector
}


/// Processes given alphabet string and returns a tuple.
///
/// - `0`. — sanitised vector of chars that are all unique and are
///   acceptable CSS characters.
/// - `1`. — supplementary vector of the (zero-based index) positions of
///   characters from the first vector which a selector name cannot start with.
///
/// # Usage
///
/// ```
/// use encode_selector::*;
/// let alphabet = into_alphabet_set("0123456789ABCDEF");
/// ```
///
/// ## Further examples
///
/// ```
/// # use encode_selector::*;
/// assert_eq!(
///     into_alphabet_set("AaBC"),
///     (vec!['A', 'a', 'B', 'C'], vec![])
/// );
/// ```
///
/// ```
/// # use encode_selector::*;
/// assert_eq!(
///     into_alphabet_set("AABCD123B"),
///     (vec!['A', 'B', 'C', 'D', '1', '2', '3'], vec![4, 5, 6])
/// );
/// ```
pub fn into_alphabet_set(alphabet: &str) -> (Vec<char>, Vec<usize>) {
	let mut alphabet_set: Vec<char> = Vec::new();

	// Sanitise alphabet, remove any invalid characters
	let sanitised_alphabet = INVALID_CHARACTERS.replace_all(alphabet, "");

	// Removing any duplicate characters.
	for char in sanitised_alphabet.chars() {
		if !alphabet_set.contains(&char) {
			alphabet_set.push(char);
		}
	}

	// Noting the positions of characters that are blacklisted from
	// being the first character in a encoded selector name.
	let invalid_as_first_char_positions: Vec<usize> = alphabet_set
		.iter()
		.enumerate()
		.filter_map(|(index, char)| {
			match INVALID_FIRST_CHARACTER.contains(char) {
				true => Some(index),
				false => None,
			}
		})
		.collect();

	(alphabet_set, invalid_as_first_char_positions)
}