#[cfg(feature = "unicode-linebreak")]
use crate::core::skip_ansi_escape_sequence;
use crate::core::Word;
pub trait WordSeparator: WordSeparatorClone + std::fmt::Debug {
fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a>;
}
#[doc(hidden)]
pub trait WordSeparatorClone {
fn clone_box(&self) -> Box<dyn WordSeparator>;
}
impl<T: WordSeparator + Clone + 'static> WordSeparatorClone for T {
fn clone_box(&self) -> Box<dyn WordSeparator> {
Box::new(self.clone())
}
}
impl Clone for Box<dyn WordSeparator> {
fn clone(&self) -> Box<dyn WordSeparator> {
use std::ops::Deref;
self.deref().clone_box()
}
}
impl WordSeparator for Box<dyn WordSeparator> {
fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
use std::ops::Deref;
self.deref().find_words(line)
}
}
#[derive(Clone, Copy, Debug, Default)]
pub struct AsciiSpace;
impl WordSeparator for AsciiSpace {
fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
let mut start = 0;
let mut in_whitespace = false;
let mut char_indices = line.char_indices();
Box::new(std::iter::from_fn(move || {
#[allow(clippy::while_let_on_iterator)]
while let Some((idx, ch)) = char_indices.next() {
if in_whitespace && ch != ' ' {
let word = Word::from(&line[start..idx]);
start = idx;
in_whitespace = ch == ' ';
return Some(word);
}
in_whitespace = ch == ' ';
}
if start < line.len() {
let word = Word::from(&line[start..]);
start = line.len();
return Some(word);
}
None
}))
}
}
#[cfg(feature = "unicode-linebreak")]
#[derive(Clone, Copy, Debug, Default)]
pub struct UnicodeBreakProperties;
#[cfg(feature = "unicode-linebreak")]
impl WordSeparator for UnicodeBreakProperties {
fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
let mut last_stripped_idx = 0;
let mut char_indices = line.char_indices();
let mut idx_map = std::iter::from_fn(move || match char_indices.next() {
Some((orig_idx, ch)) => {
let stripped_idx = last_stripped_idx;
if !skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) {
last_stripped_idx += ch.len_utf8();
}
Some((orig_idx, stripped_idx))
}
None => None,
});
let stripped = strip_ansi_escape_sequences(&line);
let mut opportunities = unicode_linebreak::linebreaks(&stripped)
.filter(|(idx, _)| {
#[allow(clippy::match_like_matches_macro)]
match &stripped[..*idx].chars().next_back() {
Some('-') => false,
Some(SHY) => false,
_ => true,
}
})
.collect::<Vec<_>>()
.into_iter();
opportunities.next_back();
let mut start = 0;
Box::new(std::iter::from_fn(move || {
#[allow(clippy::while_let_on_iterator)]
while let Some((idx, _)) = opportunities.next() {
if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx)
{
let word = Word::from(&line[start..orig_idx]);
start = orig_idx;
return Some(word);
}
}
if start < line.len() {
let word = Word::from(&line[start..]);
start = line.len();
return Some(word);
}
None
}))
}
}
#[cfg(feature = "unicode-linebreak")]
const SHY: char = '\u{00ad}';
#[cfg(feature = "unicode-linebreak")]
fn strip_ansi_escape_sequences(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut chars = text.chars();
while let Some(ch) = chars.next() {
if skip_ansi_escape_sequence(ch, &mut chars) {
continue;
}
result.push(ch);
}
result
}
#[cfg(test)]
mod tests {
use super::*;
macro_rules! assert_iter_eq {
($left:expr, $right:expr) => {
assert_eq!($left.collect::<Vec<_>>(), $right);
};
}
#[test]
fn ascii_space_empty() {
assert_iter_eq!(AsciiSpace.find_words(""), vec![]);
}
#[test]
fn ascii_space_single_word() {
assert_iter_eq!(AsciiSpace.find_words("foo"), vec![Word::from("foo")]);
}
#[test]
fn ascii_space_two_words() {
assert_iter_eq!(
AsciiSpace.find_words("foo bar"),
vec![Word::from("foo "), Word::from("bar")]
);
}
#[test]
fn ascii_space_multiple_words() {
assert_iter_eq!(
AsciiSpace.find_words("foo bar baz"),
vec![Word::from("foo "), Word::from("bar "), Word::from("baz")]
);
}
#[test]
fn ascii_space_only_whitespace() {
assert_iter_eq!(AsciiSpace.find_words(" "), vec![Word::from(" ")]);
}
#[test]
fn ascii_space_inter_word_whitespace() {
assert_iter_eq!(
AsciiSpace.find_words("foo bar"),
vec![Word::from("foo "), Word::from("bar")]
)
}
#[test]
fn ascii_space_trailing_whitespace() {
assert_iter_eq!(AsciiSpace.find_words("foo "), vec![Word::from("foo ")]);
}
#[test]
fn ascii_space_leading_whitespace() {
assert_iter_eq!(
AsciiSpace.find_words(" foo"),
vec![Word::from(" "), Word::from("foo")]
);
}
#[test]
fn ascii_space_multi_column_char() {
assert_iter_eq!(
AsciiSpace.find_words("\u{1f920}"), vec![Word::from("\u{1f920}")]
);
}
#[test]
fn ascii_space_hyphens() {
assert_iter_eq!(
AsciiSpace.find_words("foo-bar"),
vec![Word::from("foo-bar")]
);
assert_iter_eq!(
AsciiSpace.find_words("foo- bar"),
vec![Word::from("foo- "), Word::from("bar")]
);
assert_iter_eq!(
AsciiSpace.find_words("foo - bar"),
vec![Word::from("foo "), Word::from("- "), Word::from("bar")]
);
assert_iter_eq!(
AsciiSpace.find_words("foo -bar"),
vec![Word::from("foo "), Word::from("-bar")]
);
}
#[test]
#[cfg(unix)]
fn ascii_space_colored_text() {
use termion::color::{Blue, Fg, Green, Reset};
let green_hello = format!("{}Hello{} ", Fg(Green), Fg(Reset));
let blue_world = format!("{}World!{}", Fg(Blue), Fg(Reset));
assert_iter_eq!(
AsciiSpace.find_words(&format!("{}{}", green_hello, blue_world)),
vec![Word::from(&green_hello), Word::from(&blue_world)]
);
#[cfg(feature = "unicode-linebreak")]
assert_iter_eq!(
UnicodeBreakProperties.find_words(&format!("{}{}", green_hello, blue_world)),
vec![Word::from(&green_hello), Word::from(&blue_world)]
);
}
#[test]
fn ascii_space_color_inside_word() {
let text = "foo\u{1b}[0m\u{1b}[32mbar\u{1b}[0mbaz";
assert_iter_eq!(AsciiSpace.find_words(&text), vec![Word::from(text)]);
#[cfg(feature = "unicode-linebreak")]
assert_iter_eq!(
UnicodeBreakProperties.find_words(&text),
vec![Word::from(text)]
);
}
}