regex-charclass 1.0.3

Manipulate and convert regex character classes.
Documentation
use irange::RangeSet;
use once_cell::sync::Lazy;
use unicode::{general_category, perl_decimal, perl_space, perl_word, property_bool, script};

use crate::{Char, CharacterClass};

mod unicode;

type ClassesCollection = Vec<(usize, &'static [(char, char)], &'static str)>;

static CLASSES_COLLECTION: Lazy<ClassesCollection> = Lazy::new(|| {
    let mut collection = Vec::with_capacity(
        general_category::BY_NAME.len() + property_bool::BY_NAME.len() + script::BY_NAME.len(),
    );

    for (name, value) in general_category::BY_NAME {
        collection.push((value.len(), *value, *name));
    }

    for (name, value) in property_bool::BY_NAME {
        collection.push((value.len(), *value, *name));
    }

    for (name, value) in script::BY_NAME {
        collection.push((value.len(), *value, *name));
    }

    collection.sort_unstable_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(b.1)));
    collection
});

pub(super) fn identify_class(this: &RangeSet<Char>) -> Option<String> {
    if this.get_cardinality() == 1 {
        if let Some(character) = identify_character(this.iter().next()?.to_char()) {
            return Some(character.to_owned());
        }
    }

    let char = convert_to_range(this);
    if let Some(perl_class) = get_perl_class(&char) {
        return Some(perl_class.to_owned());
    }
    if let Some(class) = find_class(char.as_slice()) {
        return Some(format!("\\p{{{}}}", class));
    }

    let this = this.complement();
    let char = convert_to_range(&this);
    if let Some(perl_class) = get_perl_class(&char) {
        return Some(perl_class.to_uppercase());
    }
    if let Some(class) = find_class(char.as_slice()) {
        return Some(format!("\\P{{{}}}", class));
    }

    None
}

#[inline]
fn find_class(ranges: &[(char, char)]) -> Option<&'static str> {
    CLASSES_COLLECTION
        .binary_search_by(|(len, ranges_cmp, _)| {
            len.cmp(&ranges.len()).then_with(|| ranges_cmp.cmp(&ranges))
        })
        .ok()
        .map(|index| CLASSES_COLLECTION[index].2)
}

#[inline]
pub(super) fn identify_character(this: char) -> Option<&'static str> {
    if this == '\n' {
        Some("\\n")
    } else if this == '\r' {
        Some("\\r")
    } else if this == '\t' {
        Some("\\t")
    } else if this == '\u{B}' {
        Some("\\v")
    } else {
        None
    }
}

#[inline]
fn convert_to_range(range_set: &RangeSet<Char>) -> Vec<(char, char)> {
    range_set
        .0
        .chunks_exact(2)
        .map(|chunk| (chunk[0].to_char(), chunk[1].to_char()))
        .collect()
}

#[inline]
fn get_perl_class(range: &[(char, char)]) -> Option<&'static str> {
    if is_perl_decimal(range) {
        Some("\\d")
    } else if is_perl_space(range) {
        Some("\\s")
    } else if is_perl_word(range) {
        Some("\\w")
    } else {
        None
    }
}

#[inline]
fn is_perl_word(range: &[(char, char)]) -> bool {
    perl_word::PERL_WORD == range
}

#[inline]
fn is_perl_space(range: &[(char, char)]) -> bool {
    perl_space::WHITE_SPACE == range
}

#[inline]
fn is_perl_decimal(range: &[(char, char)]) -> bool {
    perl_decimal::DECIMAL_NUMBER == range
}