use core::{
error::Error,
fmt::{self, Display, Formatter},
str,
};
#[expect(variant_size_differences, reason = "usize is fine in size")]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum AsciiErr {
CountTooLarge(usize),
InvalidByte(u8),
Contains46,
Duplicate(u8),
}
impl Display for AsciiErr {
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match *self {
Self::CountTooLarge(byt) => {
write!(f, "the allowed ASCII had {byt} values, but 127 is the max")
}
Self::InvalidByte(byt) => {
write!(f, "allowed ASCII was passed the invalid byte value {byt}")
}
Self::Contains46 => f.write_str("allowed ASCII contains '.'"),
Self::Duplicate(byt) => {
let input = [byt];
if let Ok(val) = str::from_utf8(input.as_slice()) {
write!(f, "allowed ASCII has the duplicate value '{val}'")
} else {
write!(f, "allowed ASCII has the invalid value '{byt}'")
}
}
}
}
}
impl Error for AsciiErr {}
#[derive(Debug)]
pub struct AllowedAscii<T> {
allowed: T,
}
impl<T> AllowedAscii<T> {
#[inline]
pub const fn as_inner(&self) -> &T {
&self.allowed
}
#[inline]
pub fn into_inner(self) -> T {
self.allowed
}
}
impl<T: AsRef<[u8]>> AllowedAscii<T> {
#[inline]
#[must_use]
pub fn contains(&self, val: u8) -> bool {
self.allowed.as_ref().binary_search(&val).is_ok()
}
#[expect(
clippy::as_conversions,
clippy::cast_possible_truncation,
reason = "comment justifies its correctness"
)]
#[inline]
#[must_use]
pub fn len(&self) -> u8 {
self.allowed.as_ref().len() as u8
}
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.allowed.as_ref().is_empty()
}
}
impl<T: AsMut<[u8]>> AllowedAscii<T> {
#[inline]
pub fn try_from_unique_ascii(mut allowed: T) -> Result<Self, AsciiErr> {
let bytes = allowed.as_mut();
if bytes.len() > 127 {
Err(AsciiErr::CountTooLarge(bytes.len()))
} else {
bytes.sort_unstable();
if let Some(byt) = bytes.last() {
let b = *byt;
if b > 127 {
return Err(AsciiErr::InvalidByte(b));
}
}
bytes
.iter()
.try_fold(255, |prev, b| {
let byt = *b;
if byt == b'.' {
Err(AsciiErr::Contains46)
} else if prev == byt {
Err(AsciiErr::Duplicate(prev))
} else {
Ok(byt)
}
})
.map(|_| Self { allowed })
}
}
}
pub const PRINTABLE_ASCII: AllowedAscii<[u8; 92]> = AllowedAscii {
allowed: [
b'!', b'"', b'#', b'$', b'%', b'&', b'\'', b'(', b')', b'*', b'+', b',', b'-', b'/', b'0',
b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N',
b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'[', b']', b'^',
b'_', b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm',
b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'{', b'|',
b'}', b'~',
],
};
pub const RFC5322_ATEXT: AllowedAscii<[u8; 81]> = AllowedAscii {
allowed: [
b'!', b'#', b'$', b'%', b'&', b'\'', b'*', b'+', b'-', b'/', b'0', b'1', b'2', b'3', b'4',
b'5', b'6', b'7', b'8', b'9', b'=', b'?', b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H',
b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
b'X', b'Y', b'Z', b'^', b'_', b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i',
b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x',
b'y', b'z', b'{', b'|', b'}', b'~',
],
};
pub const ASCII_FIREFOX: AllowedAscii<[u8; 78]> = AllowedAscii {
allowed: [
b'!', b'$', b'&', b'\'', b'(', b')', b'+', b',', b'-', b'0', b'1', b'2', b'3', b'4', b'5',
b'6', b'7', b'8', b'9', b';', b'=', b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I',
b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X',
b'Y', b'Z', b'_', b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k',
b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z',
b'{', b'}', b'~',
],
};
pub const ASCII_HYPHEN_DIGITS_LETTERS: AllowedAscii<[u8; 63]> = AllowedAscii {
allowed: [
b'-', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D',
b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S',
b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h',
b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
b'x', b'y', b'z',
],
};
pub const ASCII_DIGITS_LETTERS: AllowedAscii<[u8; 62]> = AllowedAscii {
allowed: [
b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D', b'E',
b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T',
b'U', b'V', b'W', b'X', b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i',
b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x',
b'y', b'z',
],
};
pub const ASCII_LETTERS: AllowedAscii<[u8; 52]> = AllowedAscii {
allowed: [
b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'a', b'b', b'c', b'd',
b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's',
b't', b'u', b'v', b'w', b'x', b'y', b'z',
],
};
pub const ASCII_HYPHEN_DIGITS_UPPERCASE: AllowedAscii<[u8; 37]> = AllowedAscii {
allowed: [
b'-', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D',
b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S',
b'T', b'U', b'V', b'W', b'X', b'Y', b'Z',
],
};
pub const ASCII_HYPHEN_DIGITS_LOWERCASE: AllowedAscii<[u8; 37]> = AllowedAscii {
allowed: [
b'-', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'a', b'b', b'c', b'd',
b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's',
b't', b'u', b'v', b'w', b'x', b'y', b'z',
],
};
pub const ASCII_DIGITS_UPPERCASE: AllowedAscii<[u8; 36]> = AllowedAscii {
allowed: [
b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D', b'E',
b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T',
b'U', b'V', b'W', b'X', b'Y', b'Z',
],
};
pub const ASCII_DIGITS_LOWERCASE: AllowedAscii<[u8; 36]> = AllowedAscii {
allowed: [
b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'a', b'b', b'c', b'd', b'e',
b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
b'u', b'v', b'w', b'x', b'y', b'z',
],
};
pub const ASCII_UPPERCASE: AllowedAscii<[u8; 26]> = AllowedAscii {
allowed: [
b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z',
],
};
pub const ASCII_LOWERCASE: AllowedAscii<[u8; 26]> = AllowedAscii {
allowed: [
b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z',
],
};
pub const ASCII_DIGITS: AllowedAscii<[u8; 10]> = AllowedAscii {
allowed: [b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9'],
};
pub const WHATWG_VALID_DOMAIN_CODE_POINTS: AllowedAscii<[u8; 80]> = AllowedAscii {
allowed: [
b'!', b'"', b'$', b'&', b'\'', b'(', b')', b'*', b'+', b',', b'-', b'0', b'1', b'2', b'3',
b'4', b'5', b'6', b'7', b'8', b'9', b';', b'=', b'A', b'B', b'C', b'D', b'E', b'F', b'G',
b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V',
b'W', b'X', b'Y', b'Z', b'_', b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i',
b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x',
b'y', b'z', b'{', b'}', b'~',
],
};
#[cfg(test)]
mod tests {
extern crate alloc;
use crate::char_set::{
ASCII_DIGITS, ASCII_DIGITS_LETTERS, ASCII_DIGITS_LOWERCASE, ASCII_DIGITS_UPPERCASE,
ASCII_FIREFOX, ASCII_HYPHEN_DIGITS_LETTERS, ASCII_HYPHEN_DIGITS_LOWERCASE,
ASCII_HYPHEN_DIGITS_UPPERCASE, ASCII_LETTERS, ASCII_LOWERCASE, ASCII_UPPERCASE,
AllowedAscii, AsciiErr, PRINTABLE_ASCII, RFC5322_ATEXT, WHATWG_VALID_DOMAIN_CODE_POINTS,
};
use alloc::{borrow::ToOwned, vec::Vec};
#[test]
fn try_from() {
assert!(AllowedAscii::try_from_unique_ascii([]).is_ok());
assert!(
AllowedAscii::try_from_unique_ascii(b"aba".to_owned())
.map_or_else(|e| e == AsciiErr::Duplicate(b'a'), |_| false)
);
assert!(
AllowedAscii::try_from_unique_ascii(b"a.c".to_owned())
.map_or_else(|e| e == AsciiErr::Contains46, |_| false)
);
assert!(
AllowedAscii::try_from_unique_ascii([0; 128])
.map_or_else(|e| e == AsciiErr::CountTooLarge(128), |_| false)
);
let mut all_ascii = (0..b'.').collect::<Vec<u8>>();
let next = b'.' + 1;
all_ascii.extend(next..=127);
assert!(AllowedAscii::try_from_unique_ascii(all_ascii).is_ok());
assert!(
AllowedAscii::try_from_unique_ascii([255])
.map_or_else(|e| e == AsciiErr::InvalidByte(255), |_| false)
);
assert!(
AllowedAscii::try_from_unique_ascii(b"abcdef".to_owned()).map_or(false, |bytes| bytes
.contains(b'a')
&& bytes.contains(b'b')
&& bytes.contains(b'c')
&& bytes.contains(b'd')
&& bytes.contains(b'e')
&& bytes.contains(b'f'))
);
}
#[test]
fn test_consts() {
let letters = ASCII_LETTERS;
assert!(letters.len() == 52);
for i in b'A'..=b'Z' {
assert!(letters.contains(i));
}
for i in b'a'..=b'z' {
assert!(letters.contains(i));
}
let digits = ASCII_DIGITS;
assert!(digits.len() == 10);
for i in b'0'..=b'9' {
assert!(digits.contains(i));
}
let lower = ASCII_LOWERCASE;
assert!(lower.len() == 26);
for i in b'a'..=b'z' {
assert!(lower.contains(i));
}
let upper = ASCII_UPPERCASE;
assert!(upper.len() == 26);
for i in b'A'..=b'Z' {
assert!(upper.contains(i));
}
let dig_let = ASCII_DIGITS_LETTERS;
assert!(dig_let.len() == 62);
for i in b'a'..=b'z' {
assert!(dig_let.contains(i));
}
for i in b'0'..=b'9' {
assert!(dig_let.contains(i));
}
for i in b'A'..=b'Z' {
assert!(dig_let.contains(i));
}
let dig_lower = ASCII_DIGITS_LOWERCASE;
assert!(dig_lower.len() == 36);
for i in b'a'..=b'z' {
assert!(dig_lower.contains(i));
}
for i in b'0'..=b'9' {
assert!(dig_lower.contains(i));
}
let dig_upper = ASCII_DIGITS_UPPERCASE;
assert!(dig_upper.len() == 36);
for i in b'A'..=b'Z' {
assert!(dig_upper.contains(i));
}
for i in b'0'..=b'9' {
assert!(dig_upper.contains(i));
}
let ffox = ASCII_FIREFOX;
assert!(ffox.len() == 78);
for i in b'A'..=b'Z' {
assert!(ffox.contains(i));
}
for i in b'a'..=b'z' {
assert!(ffox.contains(i));
}
for i in b'0'..=b'9' {
assert!(ffox.contains(i));
}
assert!(ffox.contains(b'!'));
assert!(ffox.contains(b'$'));
assert!(ffox.contains(b'&'));
assert!(ffox.contains(b'\''));
assert!(ffox.contains(b'('));
assert!(ffox.contains(b')'));
assert!(ffox.contains(b'+'));
assert!(ffox.contains(b','));
assert!(ffox.contains(b'-'));
assert!(ffox.contains(b';'));
assert!(ffox.contains(b'='));
assert!(ffox.contains(b'_'));
assert!(ffox.contains(b'`'));
assert!(ffox.contains(b'{'));
assert!(ffox.contains(b'}'));
assert!(ffox.contains(b'~'));
assert!(ASCII_HYPHEN_DIGITS_LETTERS.len() == 63);
assert!(ASCII_HYPHEN_DIGITS_LETTERS.contains(b'-'));
for i in b'A'..=b'Z' {
assert!(ASCII_HYPHEN_DIGITS_LETTERS.contains(i));
}
for i in b'a'..=b'z' {
assert!(ASCII_HYPHEN_DIGITS_LETTERS.contains(i));
}
for i in b'0'..=b'9' {
assert!(ASCII_HYPHEN_DIGITS_LETTERS.contains(i));
}
let hyp_lower = ASCII_HYPHEN_DIGITS_LOWERCASE;
assert!(hyp_lower.len() == 37);
assert!(hyp_lower.contains(b'-'));
for i in b'a'..=b'z' {
assert!(hyp_lower.contains(i));
}
for i in b'0'..=b'9' {
assert!(hyp_lower.contains(i));
}
let hyp_upper = ASCII_HYPHEN_DIGITS_UPPERCASE;
assert!(hyp_upper.len() == 37);
assert!(hyp_upper.contains(b'-'));
for i in b'A'..=b'Z' {
assert!(hyp_upper.contains(i));
}
for i in b'0'..=b'9' {
assert!(hyp_upper.contains(i));
}
let printable = PRINTABLE_ASCII;
assert!(printable.len() == 92);
let stop = b'.' - 1;
for i in 33..=stop {
assert!(printable.contains(i));
}
let stop2 = b'\\' - 1;
for i in stop + 2..=stop2 {
assert!(printable.contains(i));
}
for i in stop2 + 2..=b'~' {
assert!(printable.contains(i));
}
let rfc = RFC5322_ATEXT;
assert!(rfc.len() == 81);
for i in b'A'..=b'Z' {
assert!(rfc.contains(i));
}
for i in b'a'..=b'z' {
assert!(rfc.contains(i));
}
for i in b'0'..=b'9' {
assert!(rfc.contains(i));
}
assert!(rfc.contains(b'!'));
assert!(rfc.contains(b'#'));
assert!(rfc.contains(b'$'));
assert!(rfc.contains(b'%'));
assert!(rfc.contains(b'&'));
assert!(rfc.contains(b'\''));
assert!(rfc.contains(b'*'));
assert!(rfc.contains(b'+'));
assert!(rfc.contains(b'-'));
assert!(rfc.contains(b'/'));
assert!(rfc.contains(b'='));
assert!(rfc.contains(b'?'));
assert!(rfc.contains(b'^'));
assert!(rfc.contains(b'_'));
assert!(rfc.contains(b'`'));
assert!(rfc.contains(b'{'));
assert!(rfc.contains(b'|'));
assert!(rfc.contains(b'}'));
assert!(rfc.contains(b'~'));
let whatwg = WHATWG_VALID_DOMAIN_CODE_POINTS;
for i in 0..=0x1f {
assert!(!whatwg.contains(i));
}
assert!(!whatwg.contains(b'\x20'));
assert!(!whatwg.contains(b'#'));
assert!(!whatwg.contains(b'/'));
assert!(!whatwg.contains(b':'));
assert!(!whatwg.contains(b'<'));
assert!(!whatwg.contains(b'>'));
assert!(!whatwg.contains(b'?'));
assert!(!whatwg.contains(b'@'));
assert!(!whatwg.contains(b'['));
assert!(!whatwg.contains(b'\\'));
assert!(!whatwg.contains(b']'));
assert!(!whatwg.contains(b'^'));
assert!(!whatwg.contains(b'|'));
assert!(!whatwg.contains(b'%'));
assert!(!whatwg.contains(b'\x7f'));
assert!(!whatwg.contains(b'.'));
assert!(whatwg.len() == 128 - 32 - 16);
}
}