use alloc::{
borrow::Cow,
string::String,
vec::Vec,
};
use crate::TrimMut;
pub trait TrimNormal {
type Normalized;
fn trim_and_normalize(self) -> Self::Normalized;
}
pub trait TrimNormalChars<I: Iterator<Item=char>> {
fn trim_and_normalize(self) -> TrimNormalIter<char, I>;
}
impl<I: Iterator<Item=char>> TrimNormalChars<I> for I {
#[inline]
fn trim_and_normalize(mut self) -> TrimNormalIter<char, I> {
let next = self.by_ref().find(|c| ! c.is_whitespace());
TrimNormalIter { iter: self, next }
}
}
pub trait TrimNormalBytes<I: Iterator<Item=u8>> {
fn trim_and_normalize(self) -> TrimNormalIter<u8, I>;
}
impl<I: Iterator<Item=u8>> TrimNormalBytes<I> for I {
#[inline]
fn trim_and_normalize(mut self) -> TrimNormalIter<u8, I> {
let next = self.by_ref().find(|c| ! c.is_ascii_whitespace());
TrimNormalIter { iter: self, next }
}
}
#[derive(Debug, Clone)]
pub struct TrimNormalIter<T: Copy + Sized, I: Iterator<Item=T>> {
iter: I,
next: Option<T>,
}
macro_rules! iter {
($ty:ty, $space:literal, $cmp:ident) => (
impl<I: Iterator<Item=$ty>> Iterator for TrimNormalIter<$ty, I> {
type Item = $ty;
fn next(&mut self) -> Option<Self::Item> {
if let Some(next) = self.next.take() { return Some(next); }
let next = self.iter.next()?;
if next.$cmp() {
self.next = self.iter.by_ref().find(|c| ! c.$cmp());
if self.next.is_some() { Some($space) }
else { None }
}
else { Some(next) }
}
fn size_hint(&self) -> (usize, Option<usize>) {
let lower = usize::from(self.next.is_some()); let (_, upper) = self.iter.size_hint(); (lower, upper.map(|n| n + lower))
}
}
impl<I: Iterator<Item=$ty> + ExactSizeIterator> ExactSizeIterator for TrimNormalIter<$ty, I> {}
impl<I: Iterator<Item=$ty> + core::iter::FusedIterator> core::iter::FusedIterator for TrimNormalIter<$ty, I> {}
);
}
iter!(char, ' ', is_whitespace);
iter!(u8, b' ', is_ascii_whitespace);
impl<'a> TrimNormal for &'a str {
type Normalized = Cow<'a, str>;
fn trim_and_normalize(self) -> Self::Normalized {
let src = self.trim();
let mut len = 0;
let mut ws = true;
let mut iter = src.chars();
while let Some(c) = iter.next() {
let mut change = None;
if c.is_whitespace() {
if ws { change.replace(false); }
else {
ws = true;
if c != ' ' { change.replace(true); }
}
}
else { ws = false; }
if let Some(change) = change {
let mut out = String::with_capacity(src.len());
if len != 0 { out.push_str(&src[..len]); }
if change { out.push(' '); }
out.extend(iter.filter_map(|c|
if c.is_whitespace() {
if ws { None }
else {
ws = true;
Some(' ')
}
}
else {
ws = false;
Some(c)
}
));
return Cow::Owned(out);
}
len += c.len_utf8();
}
Cow::Borrowed(&src[..len])
}
}
impl TrimNormal for Cow<'_, str> {
type Normalized = Self;
#[inline]
fn trim_and_normalize(self) -> Self::Normalized {
match self {
Cow::Borrowed(s) => s.trim_and_normalize(),
Cow::Owned(s) => Cow::Owned(s.trim_and_normalize()),
}
}
}
impl TrimNormal for &mut String {
type Normalized = Self;
fn trim_and_normalize(self) -> Self::Normalized {
self.trim_end_mut();
let mut ws = true;
let mut other = 0;
self.retain(|v|
if v.is_whitespace() {
if ws { false }
else {
ws = true;
if v != ' ' { other += 1; } true
}
}
else {
ws = false;
true
}
);
let mut end = self.len();
while 0 < other {
let mut len = 0;
if let Some(pos) = self[..end].rfind(|c: char|
if c.is_whitespace() && c != ' ' {
len = c.len_utf8(); true
}
else { false }
) {
self.replace_range(pos..pos + len, " ");
end = pos; other -= 1;
}
else { break; }
}
self
}
}
impl<'a> TrimNormal for &'a String {
type Normalized = Cow<'a, str>;
#[inline]
fn trim_and_normalize(self) -> Self::Normalized {
<&str as TrimNormal>::trim_and_normalize(self.as_str())
}
}
impl TrimNormal for String {
type Normalized = Self;
#[inline]
fn trim_and_normalize(mut self) -> Self::Normalized {
<&mut Self as TrimNormal>::trim_and_normalize(&mut self);
self
}
}
impl<'a> TrimNormal for &'a [u8] {
type Normalized = Cow<'a, [u8]>;
fn trim_and_normalize(self) -> Self::Normalized {
let src = self.trim_ascii();
let mut len = 0;
let mut ws = true;
let mut iter = src.iter().copied();
while let Some(c) = iter.next() {
let mut change = None;
if c.is_ascii_whitespace() {
if ws { change.replace(false); }
else {
ws = true;
if c != b' ' { change.replace(true); }
}
}
else { ws = false; }
if let Some(change) = change {
let mut out = Vec::<u8>::with_capacity(src.len());
if len != 0 { out.extend_from_slice(&src[..len]); }
if change { out.push(b' '); }
out.extend(iter.filter_map(|c|
if c.is_ascii_whitespace() {
if ws { None }
else {
ws = true;
Some(b' ')
}
}
else {
ws = false;
Some(c)
}
));
return Cow::Owned(out);
}
len += 1;
}
Cow::Borrowed(&src[..len])
}
}
impl TrimNormal for Cow<'_, [u8]> {
type Normalized = Self;
#[inline]
fn trim_and_normalize(self) -> Self::Normalized {
match self {
Cow::Borrowed(s) => s.trim_and_normalize(),
Cow::Owned(s) => Cow::Owned(s.trim_and_normalize()),
}
}
}
impl TrimNormal for &mut Vec<u8> {
type Normalized = Self;
fn trim_and_normalize(self) -> Self::Normalized {
let mut ws = true;
self.retain_mut(|v|
if v.is_ascii_whitespace() {
if ws { false }
else {
ws = true;
*v = b' ';
true
}
}
else {
ws = false;
true
}
);
if ws { self.trim_end_mut(); }
self
}
}
impl TrimNormal for Vec<u8> {
type Normalized = Self;
#[inline]
fn trim_and_normalize(mut self) -> Self::Normalized {
<&mut Self as TrimNormal>::trim_and_normalize(&mut self);
self
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn trim_and_normalize_borrowed() {
for (raw, expected) in [
("", ""),
(" ", ""),
("\n\r\x0C H E L L O\t\t", "H E L L O"),
] {
let normal = raw.trim_and_normalize();
assert_eq!(normal, expected);
assert!(matches!(normal, Cow::Borrowed(_)));
let normal = raw.as_bytes().trim_and_normalize();
assert_eq!(normal, expected.as_bytes());
assert!(matches!(normal, Cow::Borrowed(_)));
let normal: String = String::from(raw).trim_and_normalize();
assert_eq!(normal, expected);
let normal: Vec<u8> = raw.as_bytes().to_vec().trim_and_normalize();
assert_eq!(normal, expected.as_bytes());
let normal: String = raw.chars().trim_and_normalize().collect();
assert_eq!(normal, expected);
let normal: Vec<u8> = raw.bytes().trim_and_normalize().collect();
assert_eq!(normal, expected.as_bytes());
}
for (raw, expected) in [
("\u{2003}", ""),
("\u{2003}\u{2003}HEL LO\r\u{2003}", "HEL LO"),
] {
let normal = raw.trim_and_normalize();
assert_eq!(normal, expected);
assert!(matches!(normal, Cow::Borrowed(_)));
let normal: String = String::from(raw).trim_and_normalize();
assert_eq!(normal, expected);
let normal: String = raw.chars().trim_and_normalize().collect();
assert_eq!(normal, expected);
}
let sandwich = core::iter::once('[')
.chain(('\0'..=char::MAX).filter(|c| c.is_whitespace()))
.chain(core::iter::once(']'))
.collect::<String>();
assert_eq!(sandwich.as_str().trim_and_normalize(), "[ ]");
assert_eq!(sandwich.trim_and_normalize(), "[ ]");
let sandwich = core::iter::once('[')
.chain(('\0'..=char::MAX).filter(|c| c.is_whitespace()))
.chain(core::iter::once(']'))
.trim_and_normalize()
.collect::<String>();
assert_eq!(sandwich, "[ ]");
}
#[test]
fn trim_and_normalize_owned() {
for (raw, expected) in [
("H I", "H I"),
("H\tI", "H I"),
("H\tE L\n\rL\x0CO ", "H E L L O"),
] {
let normal = raw.trim_and_normalize();
assert_eq!(normal, expected);
assert!(matches!(normal, Cow::Owned(_)));
let normal = raw.as_bytes().trim_and_normalize();
assert_eq!(normal, expected.as_bytes());
assert!(matches!(normal, Cow::Owned(_)));
let normal: String = String::from(raw).trim_and_normalize();
assert_eq!(normal, expected);
let normal: Vec<u8> = raw.as_bytes().to_vec().trim_and_normalize();
assert_eq!(normal, expected.as_bytes());
let normal: String = raw.chars().trim_and_normalize().collect();
assert_eq!(normal, expected);
let normal: Vec<u8> = raw.bytes().trim_and_normalize().collect();
assert_eq!(normal, expected.as_bytes());
}
for (raw, expected) in [
("H\u{2003}I", "H I"),
("\u{2003}\u{2003}HEL\u{2003} LO\r\u{2003}", "HEL LO"),
] {
let normal = raw.trim_and_normalize();
assert_eq!(normal, expected);
assert!(matches!(normal, Cow::Owned(_)));
let normal: String = String::from(raw).trim_and_normalize();
assert_eq!(normal, expected);
let normal: String = raw.chars().trim_and_normalize().collect();
assert_eq!(normal, expected);
}
}
}