mod lookup;
#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Debug)]
#[repr(u8)]
pub enum Charset {
QTextWs = lookup::QC,
CTextWs = lookup::CT,
DTextWs = lookup::DT,
AText = lookup::AT,
RestrictedToken = lookup::RT,
Token = lookup::TO,
ObsNoWsCtl = lookup::NC,
Rfc7230Token = lookup::HT
}
impl Charset {
#[inline]
pub fn contains(&self, ch: char) -> bool {
self.contains_lookup(ch, false)
}
#[inline]
pub fn contains_or_non_ascii(&self, ch: char) -> bool {
self.contains_lookup(ch, true)
}
fn contains_lookup(&self, ch: char, out_of_table_value: bool) -> bool {
let index = ch as u32;
if index < 0x80 {
lookup::US_ASCII_LOOKUP[index as usize] & (*self as u8) != 0
} else {
out_of_table_value
}
}
pub fn lookup(ch: char) -> LookupResult {
let index = ch as u32;
if index < 0x80 {
LookupResult(Some(lookup::US_ASCII_LOOKUP[index as usize]))
} else {
LookupResult(None)
}
}
}
mod sealed{ pub trait Seal {} }
pub use self::sealed::Seal;
pub trait CharMatchExt: Seal+Copy {
fn is(self, charset: Charset) -> bool;
fn is_inkl_non_ascii(self, charset: Charset) -> bool;
}
impl Seal for char {}
impl CharMatchExt for char {
#[inline]
fn is(self, charset: Charset) -> bool {
charset.contains(self)
}
#[inline]
fn is_inkl_non_ascii(self, charset: Charset) -> bool {
charset.contains_or_non_ascii(self)
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct LookupResult(Option<u8>);
impl LookupResult {
pub fn is_ascii(&self) -> bool {
self.0.is_some()
}
fn lookup_contains(&self, charset: Charset, default: bool) -> bool {
self.0.map(|res| {
res & (charset as u8) != 0
}).unwrap_or(default)
}
}
impl Seal for LookupResult {}
impl CharMatchExt for LookupResult {
#[inline]
fn is(self, charset: Charset) -> bool {
self.lookup_contains(charset, false)
}
#[inline]
fn is_inkl_non_ascii(self, charset: Charset) -> bool {
self.lookup_contains(charset, true)
}
}
pub mod rfc5322 {
pub use super::Charset::{QTextWs, CTextWs, AText, DTextWs, ObsNoWsCtl};
}
pub mod rfc2045 {
pub use super::Charset::Token;
}
pub mod rfc6838 {
pub use super::Charset::RestrictedToken;
}
pub mod rfc7230 {
pub use super::Charset::{
QTextWs as QDText,
Rfc7230Token as Token
};
}
#[inline]
pub fn is_ws(ch: char) -> bool {
ch == ' ' || ch == '\t'
}
#[inline]
pub fn is_vchar(ch: char) -> bool {
' ' < ch && ch <= '~'
}
#[cfg(test)]
mod test {
use super::{Charset, CharMatchExt, is_vchar};
#[test]
fn lookup_result_ascii() {
let res = Charset::lookup('<');
assert!(res.is_ascii());
assert!(res.is(Charset::QTextWs));
assert!(res.is_inkl_non_ascii(Charset::QTextWs));
assert!(res.is(Charset::CTextWs));
assert!(res.is_inkl_non_ascii(Charset::CTextWs));
assert!(!res.is(Charset::AText));
assert!(!res.is_inkl_non_ascii(Charset::AText));
}
#[test]
fn lookup_result_utf8() {
let res = Charset::lookup('↓');
assert!(!res.is_ascii());
assert!(!res.is(Charset::QTextWs));
assert!(res.is_inkl_non_ascii(Charset::QTextWs));
}
#[test]
fn is_part_of_charset() {
assert!('<'.is(Charset::QTextWs));
assert!('<'.is_inkl_non_ascii(Charset::QTextWs));
assert!(!'<'.is(Charset::AText));
assert!(!'<'.is_inkl_non_ascii(Charset::AText));
let first_char_not_in_table = '\u{80}';
assert!(!first_char_not_in_table.is(Charset::CTextWs));
assert!(first_char_not_in_table.is_inkl_non_ascii(Charset::CTextWs));
}
#[test]
fn is_vchar_boundaries() {
let min = '!';
let min_m1 = ' ';
assert_eq!(min as u32 - 1, min_m1 as u32);
let max = '~';
let max_p1 = '\u{7f}';
assert_eq!(max as u32 + 1, max_p1 as u32);
assert!(is_vchar(min));
assert!(!is_vchar(min_m1));
assert!(is_vchar(max));
assert!(!is_vchar(max_p1));
}
}