use crate::component::Component;
use itertools::Itertools;
use std::fmt::{Display, Formatter, Result};
const CHARS_TO_ESCAPE: [&str; 14] = [
"(", ")", "[", "]", "{", "}", "+", "*", "-", ".", "?", "|", "^", "$",
];
const CHAR_CLASSES: [&str; 6] = ["\\d", "\\s", "\\w", "\\D", "\\S", "\\W"];
#[derive(Clone, Debug, Hash, Ord, PartialOrd, Eq, PartialEq)]
pub struct Grapheme {
pub(crate) chars: Vec<String>,
pub(crate) repetitions: Vec<Grapheme>,
min: u32,
max: u32,
is_capturing_group_enabled: bool,
is_output_colorized: bool,
}
impl Grapheme {
pub(crate) fn from(
s: &str,
is_capturing_group_enabled: bool,
is_output_colorized: bool,
) -> Self {
Self {
chars: vec![s.to_string()],
repetitions: vec![],
min: 1,
max: 1,
is_capturing_group_enabled,
is_output_colorized,
}
}
pub(crate) fn new(
chars: Vec<String>,
min: u32,
max: u32,
is_capturing_group_enabled: bool,
is_output_colorized: bool,
) -> Self {
Self {
chars,
repetitions: vec![],
min,
max,
is_capturing_group_enabled,
is_output_colorized,
}
}
pub(crate) fn value(&self) -> String {
self.chars.join("")
}
pub(crate) fn chars(&self) -> &Vec<String> {
&self.chars
}
pub(crate) fn chars_mut(&mut self) -> &mut Vec<String> {
&mut self.chars
}
pub(crate) fn has_repetitions(&self) -> bool {
!self.repetitions.is_empty()
}
pub(crate) fn repetitions_mut(&mut self) -> &mut Vec<Grapheme> {
&mut self.repetitions
}
pub(crate) fn minimum(&self) -> u32 {
self.min
}
pub(crate) fn maximum(&self) -> u32 {
self.max
}
pub(crate) fn char_count(&self, is_non_ascii_char_escaped: bool) -> usize {
if is_non_ascii_char_escaped {
self.chars
.iter()
.map(|it| it.chars().map(|c| self.escape(c, false)).join(""))
.join("")
.chars()
.count()
} else {
self.chars.iter().map(|it| it.chars().count()).sum()
}
}
pub(crate) fn escape_non_ascii_chars(&mut self, use_surrogate_pairs: bool) {
self.chars = self
.chars
.iter()
.map(|it| {
it.chars()
.map(|c| self.escape(c, use_surrogate_pairs))
.join("")
})
.collect_vec();
}
pub(crate) fn escape_regexp_symbols(
&mut self,
is_non_ascii_char_escaped: bool,
is_astral_code_point_converted_to_surrogate: bool,
) {
let characters = self.chars_mut();
#[allow(clippy::needless_range_loop)]
for i in 0..characters.len() {
let mut character = characters[i].clone();
for char_to_escape in CHARS_TO_ESCAPE.iter() {
character =
character.replace(char_to_escape, &format!("{}{}", "\\", char_to_escape));
}
character = character
.replace('\n', "\\n")
.replace('\r', "\\r")
.replace('\t', "\\t");
if character == "\\" {
character = "\\\\".to_string();
}
characters[i] = character;
}
if is_non_ascii_char_escaped {
self.escape_non_ascii_chars(is_astral_code_point_converted_to_surrogate);
}
}
fn escape(&self, c: char, use_surrogate_pairs: bool) -> String {
if c.is_ascii() {
c.to_string()
} else if use_surrogate_pairs && ('\u{10000}'..'\u{10ffff}').contains(&c) {
self.convert_to_surrogate_pair(c)
} else {
c.escape_unicode().to_string()
}
}
fn convert_to_surrogate_pair(&self, c: char) -> String {
c.encode_utf16(&mut [0; 2])
.iter()
.map(|it| format!("\\u{{{:x}}}", it))
.join("")
}
}
impl Display for Grapheme {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
let is_single_char = self.char_count(false) == 1
|| (self.chars.len() == 1 && self.chars[0].matches('\\').count() == 1);
let is_range = self.min < self.max;
let is_repetition = self.min > 1;
let mut value = if self.repetitions.is_empty() {
self.value()
} else {
self.repetitions.iter().map(|it| it.to_string()).join("")
};
value = Component::CharClass(value.clone())
.to_repr(self.is_output_colorized && CHAR_CLASSES.contains(&&*value));
if !is_range && is_repetition && is_single_char {
write!(
f,
"{}{}",
value,
Component::Repetition(self.min).to_repr(self.is_output_colorized)
)
} else if !is_range && is_repetition && !is_single_char {
write!(
f,
"{}{}",
if self.is_capturing_group_enabled {
Component::CapturedParenthesizedExpression(value)
.to_repr(self.is_output_colorized)
} else {
Component::UncapturedParenthesizedExpression(value)
.to_repr(self.is_output_colorized)
},
Component::Repetition(self.min).to_repr(self.is_output_colorized)
)
} else if is_range && is_single_char {
write!(
f,
"{}{}",
value,
Component::RepetitionRange(self.min, self.max).to_repr(self.is_output_colorized)
)
} else if is_range && !is_single_char {
write!(
f,
"{}{}",
if self.is_capturing_group_enabled {
Component::CapturedParenthesizedExpression(value)
.to_repr(self.is_output_colorized)
} else {
Component::UncapturedParenthesizedExpression(value)
.to_repr(self.is_output_colorized)
},
Component::RepetitionRange(self.min, self.max).to_repr(self.is_output_colorized)
)
} else {
write!(f, "{}", value)
}
}
}