tiny_clean/common.rs
1pub(crate) const HEX_SHIFT: u32 = 4;
2pub(crate) const HEX_MASK: u32 = 0x0F;
3
4pub(crate) const HEX: [char; 16] = [
5 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
6];
7pub(crate) const U_HEX: [char; 16] = [
8 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
9];
10
11/// Generates a bitmask for the given character `c`.
12///
13/// # Explanation
14/// - Converts the character `c` into its Unicode scalar value (`u32`).
15/// - Computes the bitwise AND with `31` (`0b11111`) to ensure the value is within the range `[0, 31]`
16/// (only the lower 5 bits are used).
17/// - Shifts the value `1` to the left by the result of the calculation, creating a bitmask where
18/// only one specific bit is set.
19///
20#[inline]
21pub(crate) const fn char_mask(c: char) -> u32 {
22 1 << (c as u32 & 31)
23}
24
25/// Calculates the "bucket" index for the given character `c` by dividing its Unicode scalar value by 32.
26///
27/// # Explanation
28/// - Converts the character `c` into its Unicode scalar value (`u32`).
29/// - Performs a bitwise right shift by 5 (`c as u32 >> 5`), which is equivalent to integer division by 32.
30/// - Casts the resulting value to `usize` for use as an index or bucket identifier in further operations.
31#[inline]
32pub(crate) const fn char_bucket(c: char) -> usize {
33 (c as u32 >> 5) as usize
34}
35
36/// Encodes a single character as a hexadecimal escape sequence and appends it to the output string.
37///
38/// # Parameters
39/// - `escape_char: char`: The prefix character used to denote the beginning of an escape sequence.
40/// Commonly, this would be a backslash (`'\\'`).
41/// - `output: &mut String`: A mutable reference to the output string where the encoded hex sequence
42/// will be appended.
43/// - `character: char`: The character to be encoded as a hexadecimal byte.
44///
45/// # Behavior
46/// - The function reserves enough space in the output string to accommodate the escape sequence (`4` characters).
47/// - The escape sequence format is `"{escape_char}xHH"`, where `HH` represents the two-digit hexadecimal
48/// value of the input character.
49/// - This function uses the constants `HEX`, `HEX_SHIFT`, and `HEX_MASK` to efficiently extract and format
50/// the hexadecimal digits.
51#[inline]
52pub(crate) fn encode_as_hex_byte(escape_char: char, output: &mut String, character: char) {
53 output.push(escape_char);
54 output.push('x');
55 output.push(HEX[(character as u32 >> HEX_SHIFT) as usize]);
56 output.push(HEX[(character as u32 & HEX_MASK) as usize]);
57}
58
59/// Encodes a single character as a Unicode escape sequence and appends it to the output string.
60///
61/// # Parameters
62/// - `escape_char: char`: The prefix character used to indicate the beginning of the escape sequence.
63/// Commonly, this would be a backslash (`'\\'`).
64/// - `output: &mut String`: A mutable reference to the output string where the Unicode escape sequence
65/// will be appended.
66/// - `character: char`: The character to be encoded as a Unicode escape sequence.
67///
68/// # Behavior
69/// - The function reserves enough space in the output string to accommodate the escape sequence (`6` characters).
70/// - The escape sequence format is `"{escape_char}uHHHH"`, where `HHHH` represents the four-digit
71/// hexadecimal Unicode code point of the input character.
72/// - Hexadecimal digits are efficiently calculated and appended to the output using bitwise operations
73/// and the `HEX` lookup table.
74#[inline]
75pub(crate) fn encode_as_unicode(escape_char: char, output: &mut String, character: char) {
76 output.push(escape_char);
77 output.push('u');
78 output.push(HEX[(character as u32 >> (3 * HEX_SHIFT)) as usize & HEX_MASK as usize]);
79 output.push(HEX[(character as u32 >> (2 * HEX_SHIFT)) as usize & HEX_MASK as usize]);
80 output.push(HEX[(character as u32 >> (1 * HEX_SHIFT)) as usize & HEX_MASK as usize]);
81 output.push(HEX[(character as u32 & HEX_MASK) as usize]);
82}
83
84pub(crate) fn dump_masks_to_ascii(masks: &[u32; 4]) {
85 println!("Dumping Mask Values (0-127)");
86 for char in '\u{0000}'..='\u{007F}' {
87 let bucket = char_bucket(char);
88 let mask = char_mask(char);
89 if (masks[bucket] & mask) != 0 {
90 println!("{char:?} {}: VALID", char as u32);
91 } else {
92 println!("{char:?} {}: INVALID", char as u32);
93 }
94 }
95}