use crate::unicode::push_case_variants;
const MAX_CP: u32 = 0x10FFFF;
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct CharSet {
pub ranges: Vec<(u32, u32)>,
}
impl CharSet {
pub fn empty() -> Self {
CharSet { ranges: Vec::new() }
}
pub fn full() -> Self {
CharSet {
ranges: vec![(0, MAX_CP)],
}
}
pub fn from_char(c: char) -> Self {
CharSet {
ranges: vec![(c as u32, c as u32)],
}
}
pub fn from_range(lo: char, hi: char) -> Self {
let (lo, hi) = (lo as u32, hi as u32);
CharSet {
ranges: vec![(lo.min(hi), lo.max(hi))],
}
}
pub fn from_ranges_unsorted(mut ranges: Vec<(u32, u32)>) -> Self {
ranges.sort_unstable();
let mut out: Vec<(u32, u32)> = Vec::new();
for (lo, hi) in ranges {
if hi < lo {
continue;
}
if let Some(last) = out.last_mut() {
if lo <= last.1.saturating_add(1) {
last.1 = last.1.max(hi);
continue;
}
}
out.push((lo, hi));
}
CharSet { ranges: out }
}
pub fn add_char(&mut self, c: char) {
let cp = c as u32;
self.ranges.push((cp, cp));
self.ranges = Self::normalize(std::mem::take(&mut self.ranges));
}
pub fn add_range(&mut self, lo: char, hi: char) {
let (lo, hi) = (lo as u32, hi as u32);
self.ranges.push((lo.min(hi), lo.max(hi)));
self.ranges = Self::normalize(std::mem::take(&mut self.ranges));
}
fn normalize(mut ranges: Vec<(u32, u32)>) -> Vec<(u32, u32)> {
ranges.sort_unstable();
let mut out: Vec<(u32, u32)> = Vec::new();
for (lo, hi) in ranges {
if hi < lo {
continue;
}
if let Some(last) = out.last_mut() {
if lo <= last.1.saturating_add(1) {
last.1 = last.1.max(hi);
continue;
}
}
out.push((lo, hi));
}
out
}
pub fn complement(&self) -> CharSet {
let mut out = Vec::new();
let mut cursor = 0u32;
for &(lo, hi) in &self.ranges {
if cursor < lo {
out.push((cursor, lo - 1));
}
cursor = hi.saturating_add(1);
if cursor == 0 {
break; }
}
if cursor <= MAX_CP {
out.push((cursor, MAX_CP));
}
CharSet { ranges: out }
}
pub fn union(&self, other: &CharSet) -> CharSet {
let mut all = Vec::with_capacity(self.ranges.len() + other.ranges.len());
all.extend_from_slice(&self.ranges);
all.extend_from_slice(&other.ranges);
CharSet::from_ranges_unsorted(all)
}
pub fn intersect(&self, other: &CharSet) -> CharSet {
let mut out = Vec::new();
let (mut i, mut j) = (0, 0);
while i < self.ranges.len() && j < other.ranges.len() {
let (a0, a1) = self.ranges[i];
let (b0, b1) = other.ranges[j];
let lo = a0.max(b0);
let hi = a1.min(b1);
if lo <= hi {
out.push((lo, hi));
}
if a1 < b1 {
i += 1;
} else {
j += 1;
}
}
CharSet { ranges: out }
}
pub fn difference(&self, other: &CharSet) -> CharSet {
self.intersect(&other.complement())
}
pub fn sym_diff(&self, other: &CharSet) -> CharSet {
self.difference(other).union(&other.difference(self))
}
pub fn contains(&self, c: char) -> bool {
let cp = c as u32;
self.ranges
.binary_search_by(|&(lo, hi)| {
if cp < lo {
std::cmp::Ordering::Greater
} else if cp > hi {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Equal
}
})
.is_ok()
}
pub fn add_case_variants(&mut self) {
let mut extras: Vec<char> = Vec::new();
for &(lo, hi) in &self.ranges {
let span = hi.saturating_sub(lo);
if span <= 4096 {
for cp in lo..=hi {
if let Some(c) = char::from_u32(cp) {
if c.is_alphabetic() {
push_case_variants(c, &mut extras);
}
}
}
} else {
for cp in [lo, hi] {
if let Some(c) = char::from_u32(cp) {
push_case_variants(c, &mut extras);
}
}
}
}
for c in extras {
self.add_char(c);
}
}
pub fn is_empty(&self) -> bool {
self.ranges.is_empty()
}
}