type Range = std::ops::RangeInclusive<char>;
#[derive(Clone, Hash, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(transparent))]
pub struct CharClass {
ranges: Vec<Range>,
}
impl CharClass {
pub fn new() -> Self {
Self { ranges: Vec::new() }
}
pub fn new_with_predicate<F>(predicate: F) -> Self
where
F: Fn(&char) -> bool,
{
let mut ranges = Vec::new();
let mut start = None;
let mut end = char::MIN;
for ch in char::MIN..=char::MAX {
if predicate(&ch) {
if start.is_none() {
start = Some(ch);
}
end = ch;
} else if let Some(start_ch) = start {
ranges.push(start_ch..=end);
start = None;
}
}
Self { ranges } }
pub fn len(&self) -> u32 {
self.ranges
.iter()
.map(|r| *r.end() as u32 - *r.start() as u32 + 1)
.sum()
}
pub fn normalize(&mut self) {
let mut prev_count: usize = 0;
while self.ranges.len() != prev_count {
prev_count = self.ranges.len();
self.ranges.sort_by(|a, b| a.start().cmp(b.start()));
for i in 0..self.ranges.len() - 1 {
let a = &self.ranges[i];
let b = &self.ranges[i + 1];
if b.start() <= a.end() && b.end() >= a.start() {
if b.end() > a.end() {
self.ranges[i] = *a.start()..=*b.end();
}
self.ranges.remove(i + 1);
break;
}
else if *a.end() as u32 + 1 == *b.start() as u32 {
self.ranges[i] = *a.start()..=*b.end();
self.ranges.remove(i + 1);
break;
}
}
}
}
pub fn negate(mut self) -> CharClass {
let mut prev_count: usize = 0;
let mut start = '\0';
let mut end = '\0';
while self.ranges.len() != prev_count {
prev_count = self.ranges.len();
for i in 0..self.ranges.len() {
let irange = self.ranges[i].clone();
if end < *irange.start() {
end = if *irange.start() > '\0' {
std::char::from_u32(*irange.start() as u32 - 1).unwrap()
} else {
'\0'
};
self.ranges[i] = start..=end;
start = if *irange.end() < std::char::MAX {
std::char::from_u32(*irange.end() as u32 + 1).unwrap()
} else {
std::char::MAX
};
end = start;
} else {
end = if *irange.end() < std::char::MAX {
std::char::from_u32(*irange.end() as u32 + 1).unwrap()
} else {
std::char::MAX
};
self.ranges.remove(i);
break;
}
}
}
if end < std::char::MAX {
self.ranges.push(end..=std::char::MAX);
}
self.normalize();
self
}
pub fn add(&mut self, range: Range) -> u32 {
let len = self.len();
self.ranges.push(range);
self.normalize();
self.len() - len
}
pub fn clear(&mut self) {
self.ranges.clear();
}
pub fn test(&self, range: &Range) -> bool {
self.ranges
.binary_search_by(|r| {
if r.start() > range.end() {
std::cmp::Ordering::Greater
} else if r.end() < range.start() {
std::cmp::Ordering::Less
} else {
if range.start() >= r.start() && range.end() <= r.end() {
std::cmp::Ordering::Equal
} else {
std::cmp::Ordering::Less }
}
})
.is_ok()
}
fn is_any(&self) -> bool {
self.ranges.len() == 1
&& *self.ranges[0].start() == 0 as char
&& *self.ranges[0].end() == std::char::MAX
}
}
impl std::fmt::Debug for CharClass {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fn escape(ch: char) -> String {
match ch {
'\x07' => "\\a".to_string(),
'\x08' => "\\b".to_string(),
'\x0c' => "\\f".to_string(),
'\n' => "\\n".to_string(),
'\r' => "\\r".to_string(),
'\t' => "\\t".to_string(),
'\x0b' => "\\v".to_string(),
'\\' => "\\\\".to_string(),
_ => format!("{}", ch),
}
}
if self.is_any() {
write!(f, ".")?;
} else {
write!(f, "[")?;
for range in &self.ranges {
if range.start() < range.end() {
write!(f, "{}-{}", escape(*range.start()), escape(*range.end()))?;
} else {
write!(f, "{}", escape(*range.start()))?;
}
}
write!(f, "]")?;
}
Ok(())
}
}
impl PartialOrd for CharClass {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
if self.ranges.len() == other.ranges.len() {
for (mine, other) in self.ranges.iter().zip(other.ranges.iter()) {
if other.end() > mine.end() || other.start() > mine.start() {
return Some(std::cmp::Ordering::Less);
} else if other.end() < mine.end() {
return Some(std::cmp::Ordering::Greater);
}
}
Some(std::cmp::Ordering::Equal)
} else {
None
}
}
}
impl std::ops::Add for CharClass {
type Output = Self;
fn add(mut self, other: Self) -> Self {
for range in &other.ranges {
self.ranges.push(range.clone());
}
self.normalize();
self
}
}
impl std::ops::AddAssign for CharClass {
fn add_assign(&mut self, other: Self) {
for range in &other.ranges {
self.ranges.push(range.clone());
}
self.normalize();
}
}
#[macro_export]
macro_rules! charclass {
( $( $from:expr => $to:expr ),+ ) => {
{
let mut ccl = $crate::CharClass::new();
$( ccl.add($from..=$to); )*
ccl
}
};
( $( $chr:expr ),+ ) => {
{
let mut ccl = $crate::CharClass::new();
$( ccl.add($chr..=$chr); )*
ccl
}
};
}
#[test]
fn playground() {
let mut ccl = CharClass::new();
ccl.add('a'..='c');
ccl.add('€'..='€');
ccl.add('k'..='v');
ccl.normalize();
println!("{:?}", ccl);
ccl.normalize();
println!("{:?}", ccl);
for c in b'a'..=b'z' {
let c = char::from(c);
println!("{}: {}", c, ccl.test(&(c..=c)));
}
for rg in vec!['k'..='v', 'l'..='o', 'a'..='d', 'a'..='b', 'k'..='x'] {
println!("{:?} {}", &rg, ccl.test(&rg));
}
let mut t = CharClass::new();
t.add('A'..='D');
ccl += t;
println!("{:?}", ccl);
}
#[test]
fn uppercase_test() {
let ccl = CharClass::new_with_predicate(|ch| char::is_uppercase(*ch));
println!("{:?}", ccl);
println!("{:?}", ccl.len());
println!("{:?}", ccl.ranges.len());
assert_eq!(ccl.test(&('Ä'..='Ä')), true);
assert_eq!(ccl.test(&('ö'..='ö')), false);
}
#[test]
fn ascii_test() {
let ccl = CharClass::new_with_predicate(char::is_ascii_uppercase);
println!("{:?}", ccl);
println!("{:?}", ccl.len());
println!("{:?}", ccl.ranges.len());
assert_eq!(ccl.test(&('A'..='C')), true);
assert_eq!(ccl.test(&('a'..='c')), false);
}