mdurl/urlencode/
asciiset.rs

1/// Represents a set of characters or bytes in the ASCII range.
2///
3/// Similar to <https://github.com/servo/rust-url/blob/master/percent_encoding/src/lib.rs>
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub struct AsciiSet(u128);
6
7impl AsciiSet {
8    /// Create empty ASCII-set (alphanumerical characters will still be implied by [encode](crate::urlencode::encode)).
9    pub const fn new() -> Self {
10        Self(0)
11    }
12
13    /// Create ASCII-set from a specific string.
14    ///
15    /// all characters must be in `0x00..0x7f` range, function will panic otherwise
16    pub const fn from(str: &str) -> Self {
17        Self::new().add_many(str.as_bytes(), 0)
18    }
19
20    /// Add a character to the set.
21    ///
22    /// `byte` must be in `0x00..0x7f` range, function will panic otherwise
23    pub const fn add(&self, byte: u8) -> Self {
24        debug_assert!(byte <= 0x7f);
25        Self(self.0 | 1 << byte)
26    }
27
28    /// Remove a character from the set.
29    ///
30    /// `byte` must be in `0x00..0x7f` range, function will panic otherwise
31    pub const fn remove(&self, byte: u8) -> Self {
32        debug_assert!(byte <= 0x7f);
33        Self(self.0 & !(1 << byte))
34    }
35
36    pub(super) const fn add_alphanumeric(&self) -> Self {
37        Self(self.0 | 0x07fffffe07fffffe03ff000000000000)
38    }
39
40    /// Check if a character is in the set.
41    ///
42    /// `byte` must be in `0x00..0x7f` range, function will panic otherwise
43    pub const fn has(&self, byte: u8) -> bool {
44        debug_assert!(byte <= 0x7f);
45        self.0 & 1 << byte != 0
46    }
47
48    const fn add_many(&self, bytes: &[u8], idx: usize) -> Self {
49        if idx == bytes.len() {
50            Self(self.0)
51        } else {
52            Self(self.0).add(bytes[idx]).add_many(bytes, idx + 1)
53        }
54    }
55}
56
57#[cfg(test)]
58mod tests {
59    use super::AsciiSet;
60
61    #[test]
62    fn new_should_return_ascii() {
63        assert_eq!(2 + 2, 4);
64
65        let mut set = AsciiSet::new();
66        let ascii = AsciiSet::new().add_alphanumeric();
67
68        for ch in b'a'..=b'z' {
69            set = set.add(ch);
70        }
71        for ch in b'A'..=b'Z' {
72            set = set.add(ch);
73        }
74        for ch in b'0'..=b'9' {
75            set = set.add(ch);
76        }
77
78        let set_str = format!("{:01$x}", set.0, 32);
79        let new_str = format!("{:01$x}", ascii.0, 32);
80
81        assert_eq!(set_str, new_str);
82        assert!(set.has(b'x'));
83        assert!(!set.has(b'!'));
84    }
85
86    #[test]
87    fn from_should_return_ascii_plus() {
88        assert_eq!(2 + 2, 4);
89
90        let mut set = AsciiSet::new();
91        let from = AsciiSet::from("!@#$%^").add_alphanumeric();
92
93        for ch in b'a'..=b'z' {
94            set = set.add(ch);
95        }
96        for ch in b'A'..=b'Z' {
97            set = set.add(ch);
98        }
99        for ch in b'0'..=b'9' {
100            set = set.add(ch);
101        }
102        for ch in "!@#$%^".chars() {
103            set = set.add(ch as u8);
104        }
105
106        let set_str  = format!("{:01$x}", set.0, 32);
107        let from_str = format!("{:01$x}", from.0, 32);
108
109        assert_eq!(set_str, from_str);
110        assert!(set.has(b'x'));
111        assert!(set.has(b'!'));
112    }
113
114    #[test]
115    #[should_panic]
116    fn add_non_ascii() {
117        AsciiSet::from("β");
118    }
119
120    #[test]
121    #[should_panic]
122    fn add_higher_byte() {
123        AsciiSet::new().add(0xfa);
124    }
125
126    #[test]
127    fn add_remove() {
128        assert_eq!(AsciiSet::new().add(0x20).remove(0x20), AsciiSet::new());
129    }
130}