use std::borrow::Cow;
pub trait ToAsciiLowercaseCow {
fn to_ascii_lowercase_cow(&self) -> Cow<str>;
}
impl ToAsciiLowercaseCow for str {
fn to_ascii_lowercase_cow(&self) -> Cow<str> {
debug_assert!(self.is_ascii());
let bytes = self.as_bytes();
for idx in 0..bytes.len() {
let chr = bytes[idx];
if chr != chr.to_ascii_lowercase() {
let mut s = bytes.to_vec();
for b in &mut s[idx..] {
b.make_ascii_lowercase();
}
return Cow::Owned(unsafe { String::from_utf8_unchecked(s) });
}
}
Cow::Borrowed(self)
}
}
impl ToAsciiLowercaseCow for String {
#[inline(always)]
fn to_ascii_lowercase_cow(&self) -> Cow<str> {
self.as_str().to_ascii_lowercase_cow()
}
}
#[derive(Eq, PartialEq)]
pub enum CharSignal {
None,
Keep,
AlreadyPrinted(char),
}
#[derive(Debug, Eq, PartialEq, Clone, Copy)]
pub enum Quote {
Double,
Single,
}
impl Quote {
pub fn as_char(&self) -> char {
match self {
Quote::Double => '"',
Quote::Single => '\'',
}
}
pub fn as_string(&self) -> &str {
match self {
Quote::Double => "\"",
Quote::Single => "'",
}
}
pub fn as_escaped(&self) -> &str {
match self {
Quote::Double => "\\\"",
Quote::Single => "\\'",
}
}
pub fn as_bytes(&self) -> u8 {
self.as_char() as u8
}
pub fn other(&self) -> Self {
match self {
Quote::Double => Quote::Single,
Quote::Single => Quote::Double,
}
}
}
pub fn normalize_string(
raw_content: &str,
preferred_quote: Quote,
is_escape_preserved: bool,
) -> Cow<str> {
let alternate_quote = preferred_quote.other();
if !raw_content.contains(['\\', preferred_quote.as_char(), alternate_quote.as_char()]) {
return Cow::Borrowed(raw_content);
}
let mut reduced_string = String::new();
let mut signal = CharSignal::None;
let mut chars = raw_content.char_indices().peekable();
while let Some((_, current_char)) = chars.next() {
let next_character = chars.peek();
if let CharSignal::AlreadyPrinted(char) = signal {
if char == current_char {
continue;
}
}
match current_char {
'\\' => {
let bytes = raw_content.as_bytes();
if let Some((next_index, next_character)) = next_character {
if *next_character as u8 == alternate_quote.as_bytes()
&& *next_index < bytes.len()
{
match signal {
CharSignal::Keep => {
reduced_string.push(current_char);
}
_ => {
reduced_string.push(alternate_quote.as_char());
signal = CharSignal::AlreadyPrinted(alternate_quote.as_char());
}
}
} else if signal == CharSignal::Keep {
reduced_string.push(current_char);
signal = CharSignal::None;
}
else if "^\n\r\"'01234567\\bfnrtuvx\u{2028}\u{2029}".contains(*next_character)
{
signal = CharSignal::Keep;
reduced_string.push(current_char);
} else {
if is_escape_preserved {
reduced_string.push(current_char);
}
continue;
}
} else {
reduced_string.push(current_char);
}
}
'\n' | '\t' => {
if let CharSignal::AlreadyPrinted(the_char) = signal {
if matches!(the_char, '\n' | '\t') {
signal = CharSignal::None
}
} else {
reduced_string.push(current_char);
}
}
'\r' if next_character.map_or(false, |(_, c)| *c == '\n') => {
reduced_string.push('\n');
signal = CharSignal::AlreadyPrinted('\n');
}
_ => {
if current_char == preferred_quote.as_char() {
let last_char = &reduced_string.chars().last();
if let Some('\\') = last_char {
reduced_string.push(preferred_quote.as_char());
} else {
reduced_string.push_str(preferred_quote.as_escaped());
}
} else if current_char == alternate_quote.as_char() {
match signal {
CharSignal::None | CharSignal::Keep => {
reduced_string.push(alternate_quote.as_char());
}
CharSignal::AlreadyPrinted(_) => (),
}
} else {
reduced_string.push(current_char);
}
signal = CharSignal::None;
}
}
}
if reduced_string.is_empty() {
Cow::Borrowed(raw_content)
} else {
if reduced_string == raw_content {
Cow::Borrowed(raw_content)
} else {
Cow::Owned(reduced_string)
}
}
}