use core::cmp;
use core::iter::Filter;
use crate::tables::word::WordCat;
#[derive(Debug)]
pub struct UnicodeWords<'a> {
inner: WordsIter<'a>,
}
impl<'a> Iterator for UnicodeWords<'a> {
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
match &mut self.inner {
WordsIter::Ascii(i) => i.next(),
WordsIter::Unicode(i) => i.next(),
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
match &self.inner {
WordsIter::Ascii(i) => i.size_hint(),
WordsIter::Unicode(i) => i.size_hint(),
}
}
}
impl<'a> DoubleEndedIterator for UnicodeWords<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
match &mut self.inner {
WordsIter::Ascii(i) => i.next_back(),
WordsIter::Unicode(i) => i.next_back(),
}
}
}
#[derive(Debug)]
pub struct UnicodeWordIndices<'a> {
inner: IndicesIter<'a>,
}
impl<'a> Iterator for UnicodeWordIndices<'a> {
type Item = (usize, &'a str);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
match &mut self.inner {
IndicesIter::Ascii(i) => i.next(),
IndicesIter::Unicode(i) => i.next(),
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
match &self.inner {
IndicesIter::Ascii(i) => i.size_hint(),
IndicesIter::Unicode(i) => i.size_hint(),
}
}
}
impl<'a> DoubleEndedIterator for UnicodeWordIndices<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
match &mut self.inner {
IndicesIter::Ascii(i) => i.next_back(),
IndicesIter::Unicode(i) => i.next_back(),
}
}
}
#[derive(Debug, Clone)]
pub struct UWordBounds<'a> {
string: &'a str,
cat: Option<WordCat>,
catb: Option<WordCat>,
}
#[derive(Debug, Clone)]
pub struct UWordBoundIndices<'a> {
start_offset: usize,
iter: UWordBounds<'a>,
}
impl<'a> UWordBoundIndices<'a> {
#[inline]
pub fn as_str(&self) -> &'a str {
self.iter.as_str()
}
}
impl<'a> Iterator for UWordBoundIndices<'a> {
type Item = (usize, &'a str);
#[inline]
fn next(&mut self) -> Option<(usize, &'a str)> {
self.iter
.next()
.map(|s| (s.as_ptr() as usize - self.start_offset, s))
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
impl<'a> DoubleEndedIterator for UWordBoundIndices<'a> {
#[inline]
fn next_back(&mut self) -> Option<(usize, &'a str)> {
self.iter
.next_back()
.map(|s| (s.as_ptr() as usize - self.start_offset, s))
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
enum UWordBoundsState {
Start,
Letter,
HLetter,
Numeric,
Katakana,
ExtendNumLet,
Regional(RegionalState),
FormatExtend(FormatExtendType),
Zwj,
Emoji,
WSegSpace,
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
enum FormatExtendType {
AcceptAny,
AcceptNone,
RequireLetter,
RequireHLetter,
AcceptQLetter,
RequireNumeric,
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
enum RegionalState {
Half,
Full,
Unknown,
}
fn is_emoji(ch: char) -> bool {
use crate::tables::emoji;
emoji::emoji_category(ch).2 == emoji::EmojiCat::EC_Extended_Pictographic
}
impl<'a> Iterator for UWordBounds<'a> {
type Item = &'a str;
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let slen = self.string.len();
(cmp::min(slen, 1), Some(slen))
}
#[inline]
fn next(&mut self) -> Option<&'a str> {
use self::FormatExtendType::*;
use self::UWordBoundsState::*;
use crate::tables::word as wd;
if self.string.is_empty() {
return None;
}
let mut take_curr = true;
let mut take_cat = true;
let mut idx = 0;
let mut saveidx = 0;
let mut state = Start;
let mut cat = wd::WC_Any;
let mut savecat = wd::WC_Any;
let mut skipped_format_extend = false;
for (curr, ch) in self.string.char_indices() {
idx = curr;
let prev_zwj = cat == wd::WC_ZWJ;
cat = match self.cat {
None => wd::word_category(ch).2,
_ => self.cat.take().unwrap(),
};
take_cat = true;
if state != Start {
match cat {
wd::WC_Extend | wd::WC_Format | wd::WC_ZWJ => {
skipped_format_extend = true;
continue;
}
_ => {}
}
}
if prev_zwj && is_emoji(ch) {
state = Emoji;
continue;
}
state = match state {
Start if cat == wd::WC_CR => {
idx += match self.get_next_cat(idx) {
Some(wd::WC_LF) => 1, _ => 0,
};
break; }
Start => match cat {
wd::WC_ALetter => Letter, wd::WC_Hebrew_Letter => HLetter, wd::WC_Numeric => Numeric, wd::WC_Katakana => Katakana, wd::WC_ExtendNumLet => ExtendNumLet, wd::WC_Regional_Indicator => Regional(RegionalState::Half), wd::WC_LF | wd::WC_Newline => break, wd::WC_ZWJ => Zwj, wd::WC_WSegSpace => WSegSpace, _ => {
if let Some(ncat) = self.get_next_cat(idx) {
if ncat == wd::WC_Format || ncat == wd::WC_Extend || ncat == wd::WC_ZWJ
{
state = FormatExtend(AcceptNone);
self.cat = Some(ncat);
continue;
}
}
break; }
},
WSegSpace => match cat {
wd::WC_WSegSpace if !skipped_format_extend => WSegSpace,
_ => {
take_curr = false;
break;
}
},
Zwj => {
take_curr = false;
break;
}
Letter | HLetter => match cat {
wd::WC_ALetter => Letter, wd::WC_Hebrew_Letter => HLetter, wd::WC_Numeric => Numeric, wd::WC_ExtendNumLet => ExtendNumLet, wd::WC_Double_Quote if state == HLetter => {
savecat = cat;
saveidx = idx;
FormatExtend(RequireHLetter) }
wd::WC_Single_Quote if state == HLetter => {
FormatExtend(AcceptQLetter) }
wd::WC_MidLetter | wd::WC_MidNumLet | wd::WC_Single_Quote => {
savecat = cat;
saveidx = idx;
FormatExtend(RequireLetter) }
_ => {
take_curr = false;
break;
}
},
Numeric => match cat {
wd::WC_Numeric => Numeric, wd::WC_ALetter => Letter, wd::WC_Hebrew_Letter => HLetter, wd::WC_ExtendNumLet => ExtendNumLet, wd::WC_MidNum | wd::WC_MidNumLet | wd::WC_Single_Quote => {
savecat = cat;
saveidx = idx;
FormatExtend(RequireNumeric) }
_ => {
take_curr = false;
break;
}
},
Katakana => match cat {
wd::WC_Katakana => Katakana, wd::WC_ExtendNumLet => ExtendNumLet, _ => {
take_curr = false;
break;
}
},
ExtendNumLet => match cat {
wd::WC_ExtendNumLet => ExtendNumLet, wd::WC_ALetter => Letter, wd::WC_Hebrew_Letter => HLetter, wd::WC_Numeric => Numeric, wd::WC_Katakana => Katakana, _ => {
take_curr = false;
break;
}
},
Regional(RegionalState::Full) => {
take_curr = false;
break;
}
Regional(RegionalState::Half) => match cat {
wd::WC_Regional_Indicator => Regional(RegionalState::Full), _ => {
take_curr = false;
break;
}
},
Regional(_) => {
unreachable!("RegionalState::Unknown should not occur on forward iteration")
}
Emoji => {
take_curr = false;
break;
}
FormatExtend(t) => match t {
RequireNumeric if cat == wd::WC_Numeric => Numeric, RequireLetter | AcceptQLetter if cat == wd::WC_ALetter => Letter, RequireLetter | AcceptQLetter if cat == wd::WC_Hebrew_Letter => HLetter, RequireHLetter if cat == wd::WC_Hebrew_Letter => HLetter, AcceptNone | AcceptQLetter => {
take_curr = false; take_cat = false;
break;
}
_ => break, },
}
}
if let FormatExtend(t) = state {
if t == RequireLetter || t == RequireHLetter || t == RequireNumeric {
idx = saveidx;
cat = savecat;
take_curr = false;
}
}
self.cat = if take_curr {
idx = idx + self.string[idx..].chars().next().unwrap().len_utf8();
None
} else if take_cat {
Some(cat)
} else {
None
};
let retstr = &self.string[..idx];
self.string = &self.string[idx..];
Some(retstr)
}
}
impl<'a> DoubleEndedIterator for UWordBounds<'a> {
#[inline]
fn next_back(&mut self) -> Option<&'a str> {
use self::FormatExtendType::*;
use self::UWordBoundsState::*;
use crate::tables::word as wd;
if self.string.is_empty() {
return None;
}
let mut take_curr = true;
let mut take_cat = true;
let mut idx = self.string.len();
idx -= self.string.chars().next_back().unwrap().len_utf8();
let mut previdx = idx;
let mut saveidx = idx;
let mut state = Start;
let mut savestate = Start;
let mut cat = wd::WC_Any;
let mut right_significant_is_emoji: bool = false;
let mut skipped_format_extend = false;
for (curr, ch) in self.string.char_indices().rev() {
previdx = idx;
idx = curr;
cat = match self.catb {
None => wd::word_category(ch).2,
_ => self.catb.take().unwrap(),
};
take_cat = true;
if cat == wd::WC_ZWJ && state != Zwj && right_significant_is_emoji {
continue;
}
if cat != wd::WC_Extend && cat != wd::WC_Format {
right_significant_is_emoji = is_emoji(ch);
}
if cat == wd::WC_Extend || cat == wd::WC_Format || (cat == wd::WC_ZWJ && state != Zwj) {
if !matches!(state, FormatExtend(_) | Start) {
saveidx = previdx;
savestate = state;
state = FormatExtend(AcceptNone);
}
if state != Start {
continue;
}
} else if state == FormatExtend(AcceptNone) {
state = savestate;
previdx = saveidx;
take_cat = false;
skipped_format_extend = true;
}
state = match state {
Start | FormatExtend(AcceptAny) => match cat {
wd::WC_ALetter => Letter, wd::WC_Hebrew_Letter => HLetter, wd::WC_Numeric => Numeric, wd::WC_Katakana => Katakana, wd::WC_ExtendNumLet => ExtendNumLet, wd::WC_Regional_Indicator => Regional(RegionalState::Unknown), wd::WC_Extend | wd::WC_Format | wd::WC_ZWJ => FormatExtend(AcceptAny),
wd::WC_Single_Quote => {
saveidx = idx;
FormatExtend(AcceptQLetter) }
wd::WC_WSegSpace => WSegSpace,
wd::WC_CR | wd::WC_LF | wd::WC_Newline => {
if state == Start {
if cat == wd::WC_LF {
idx -= match self.get_prev_cat(idx) {
Some(wd::WC_CR) => 1, _ => 0,
};
}
} else {
take_curr = false;
}
break; }
_ if is_emoji(ch) => Zwj,
_ => break, },
Zwj => match cat {
wd::WC_ZWJ => FormatExtend(AcceptAny),
_ => {
take_curr = false;
break;
}
},
WSegSpace => match cat {
wd::WC_WSegSpace if !skipped_format_extend => WSegSpace,
_ => {
take_curr = false;
break;
}
},
Letter | HLetter => match cat {
wd::WC_ALetter => Letter, wd::WC_Hebrew_Letter => HLetter, wd::WC_Numeric => Numeric, wd::WC_ExtendNumLet => ExtendNumLet, wd::WC_Double_Quote if state == HLetter => {
saveidx = previdx;
FormatExtend(RequireHLetter) }
wd::WC_MidLetter | wd::WC_MidNumLet | wd::WC_Single_Quote => {
saveidx = previdx;
FormatExtend(RequireLetter) }
_ => {
take_curr = false;
break;
}
},
Numeric => match cat {
wd::WC_Numeric => Numeric, wd::WC_ALetter => Letter, wd::WC_Hebrew_Letter => HLetter, wd::WC_ExtendNumLet => ExtendNumLet, wd::WC_MidNum | wd::WC_MidNumLet | wd::WC_Single_Quote => {
saveidx = previdx;
FormatExtend(RequireNumeric) }
_ => {
take_curr = false;
break;
}
},
Katakana => match cat {
wd::WC_Katakana => Katakana, wd::WC_ExtendNumLet => ExtendNumLet, _ => {
take_curr = false;
break;
}
},
ExtendNumLet => match cat {
wd::WC_ExtendNumLet => ExtendNumLet, wd::WC_ALetter => Letter, wd::WC_Hebrew_Letter => HLetter, wd::WC_Numeric => Numeric, wd::WC_Katakana => Katakana, _ => {
take_curr = false;
break;
}
},
Regional(mut regional_state) => match cat {
wd::WC_Regional_Indicator => {
if regional_state == RegionalState::Unknown {
let count = self.string[..previdx]
.chars()
.rev()
.map(|c| wd::word_category(c).2)
.filter(|&c| {
!(c == wd::WC_ZWJ || c == wd::WC_Extend || c == wd::WC_Format)
})
.take_while(|&c| c == wd::WC_Regional_Indicator)
.count();
regional_state = if count % 2 == 0 {
RegionalState::Full
} else {
RegionalState::Half
};
}
if regional_state == RegionalState::Full {
take_curr = false;
break;
} else {
Regional(RegionalState::Full)
}
}
_ => {
take_curr = false;
break;
}
},
Emoji => {
if is_emoji(ch) {
Zwj
} else {
take_curr = false;
break;
}
}
FormatExtend(t) => match t {
RequireNumeric if cat == wd::WC_Numeric => Numeric, RequireLetter if cat == wd::WC_ALetter => Letter, RequireLetter if cat == wd::WC_Hebrew_Letter => HLetter, AcceptQLetter if cat == wd::WC_Hebrew_Letter => HLetter, RequireHLetter if cat == wd::WC_Hebrew_Letter => HLetter, _ => break, },
}
}
if let FormatExtend(t) = state {
if t == RequireLetter
|| t == RequireHLetter
|| t == RequireNumeric
|| t == AcceptNone
|| t == AcceptQLetter
{
previdx = saveidx;
take_cat = false;
take_curr = false;
}
}
self.catb = if take_curr {
None
} else {
idx = previdx;
if take_cat {
Some(cat)
} else {
None
}
};
let retstr = &self.string[idx..];
self.string = &self.string[..idx];
Some(retstr)
}
}
impl<'a> UWordBounds<'a> {
#[inline]
pub fn as_str(&self) -> &'a str {
self.string
}
#[inline]
fn get_next_cat(&self, idx: usize) -> Option<WordCat> {
use crate::tables::word as wd;
let nidx = idx + self.string[idx..].chars().next().unwrap().len_utf8();
if nidx < self.string.len() {
let nch = self.string[nidx..].chars().next().unwrap();
Some(wd::word_category(nch).2)
} else {
None
}
}
#[inline]
fn get_prev_cat(&self, idx: usize) -> Option<WordCat> {
use crate::tables::word as wd;
if idx > 0 {
let nch = self.string[..idx].chars().next_back().unwrap();
Some(wd::word_category(nch).2)
} else {
None
}
}
}
#[derive(Debug)]
struct AsciiWordBoundIter<'a> {
rest: &'a str,
offset: usize,
}
impl<'a> AsciiWordBoundIter<'a> {
pub fn new(s: &'a str) -> Self {
AsciiWordBoundIter { rest: s, offset: 0 }
}
#[inline]
fn is_core(b: u8) -> bool {
b.is_ascii_alphanumeric() || b == b'_'
}
#[inline]
fn is_infix(b: u8, prev: u8, next: u8) -> bool {
match b {
b'.' | b',' | b';' | b'\'' if prev.is_ascii_digit() && next.is_ascii_digit() => true,
b'\'' | b'.' | b':' if prev.is_ascii_alphabetic() && next.is_ascii_alphabetic() => true,
_ => false,
}
}
}
impl<'a> Iterator for AsciiWordBoundIter<'a> {
type Item = (usize, &'a str);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.rest.is_empty() {
return None;
}
let bytes = self.rest.as_bytes();
let len = bytes.len();
if bytes[0] == b' ' {
let mut i = 1;
while i < len && bytes[i] == b' ' {
i += 1;
}
let word = &self.rest[..i];
let pos = self.offset;
self.rest = &self.rest[i..];
self.offset += i;
return Some((pos, word));
}
if Self::is_core(bytes[0]) {
let mut i = 1;
while i < len {
let b = bytes[i];
if Self::is_core(b)
|| (i + 1 < len && Self::is_infix(b, bytes[i - 1], bytes[i + 1]))
{
i += 1;
} else {
break;
}
}
let word = &self.rest[..i];
let pos = self.offset;
self.rest = &self.rest[i..];
self.offset += i;
return Some((pos, word));
}
if bytes[0] == b'\r' && len >= 2 && bytes[1] == b'\n' {
let word = &self.rest[..2];
let pos = self.offset;
self.rest = &self.rest[2..];
self.offset += 2;
Some((pos, word))
} else {
let word = &self.rest[..1];
let pos = self.offset;
self.rest = &self.rest[1..];
self.offset += 1;
Some((pos, word))
}
}
}
impl<'a> DoubleEndedIterator for AsciiWordBoundIter<'a> {
fn next_back(&mut self) -> Option<(usize, &'a str)> {
let rest = self.rest;
if rest.is_empty() {
return None;
}
let bytes = rest.as_bytes();
let len = bytes.len();
if bytes[len - 1] == b' ' {
let mut start = len - 1;
while start > 0 && bytes[start - 1] == b' ' {
start -= 1;
}
let word = &rest[start..];
let pos = self.offset + start;
self.rest = &rest[..start];
return Some((pos, word));
}
if Self::is_core(bytes[len - 1]) {
let mut start = len - 1;
while start > 0 {
let b = bytes[start - 1];
let prev = if start >= 2 { bytes[start - 2] } else { b };
let next = bytes[start]; if Self::is_core(b) || Self::is_infix(b, prev, next) {
start -= 1;
} else {
break;
}
}
let word = &rest[start..];
let pos = self.offset + start;
self.rest = &rest[..start];
return Some((pos, word));
}
if len >= 2 && bytes[len - 2] == b'\r' && bytes[len - 1] == b'\n' {
let start = len - 2;
let word = &rest[start..];
let pos = self.offset + start;
self.rest = &rest[..start];
return Some((pos, word));
}
let start = len - 1;
let word = &rest[start..];
let pos = self.offset + start;
self.rest = &rest[..start];
Some((pos, word))
}
}
#[inline]
fn ascii_word_ok(t: &(usize, &str)) -> bool {
has_ascii_alphanumeric(&t.1)
}
#[inline]
fn unicode_word_ok(t: &(usize, &str)) -> bool {
has_alphanumeric(&t.1)
}
type AsciiWordsIter<'a> = Filter<
core::iter::Map<AsciiWordBoundIter<'a>, fn((usize, &'a str)) -> &'a str>,
fn(&&'a str) -> bool,
>;
type UnicodeWordsIter<'a> = Filter<UWordBounds<'a>, fn(&&'a str) -> bool>;
type AsciiIndicesIter<'a> = Filter<AsciiWordBoundIter<'a>, fn(&(usize, &'a str)) -> bool>;
type UnicodeIndicesIter<'a> = Filter<UWordBoundIndices<'a>, fn(&(usize, &'a str)) -> bool>;
#[derive(Debug)]
enum WordsIter<'a> {
Ascii(AsciiWordsIter<'a>),
Unicode(UnicodeWordsIter<'a>),
}
#[derive(Debug)]
enum IndicesIter<'a> {
Ascii(AsciiIndicesIter<'a>),
Unicode(UnicodeIndicesIter<'a>),
}
#[inline]
pub fn new_unicode_words(s: &str) -> UnicodeWords<'_> {
let inner = if s.is_ascii() {
WordsIter::Ascii(new_unicode_words_ascii(s))
} else {
WordsIter::Unicode(new_unicode_words_general(s))
};
UnicodeWords { inner }
}
#[inline]
pub fn new_unicode_word_indices(s: &str) -> UnicodeWordIndices<'_> {
let inner = if s.is_ascii() {
IndicesIter::Ascii(new_ascii_word_bound_indices(s).filter(ascii_word_ok))
} else {
IndicesIter::Unicode(new_word_bound_indices(s).filter(unicode_word_ok))
};
UnicodeWordIndices { inner }
}
#[inline]
pub fn new_word_bounds(s: &str) -> UWordBounds<'_> {
UWordBounds {
string: s,
cat: None,
catb: None,
}
}
#[inline]
pub fn new_word_bound_indices(s: &str) -> UWordBoundIndices<'_> {
UWordBoundIndices {
start_offset: s.as_ptr() as usize,
iter: new_word_bounds(s),
}
}
#[inline]
fn new_ascii_word_bound_indices(s: &str) -> AsciiWordBoundIter<'_> {
AsciiWordBoundIter::new(s)
}
#[inline]
fn has_alphanumeric(s: &&str) -> bool {
use crate::tables::util::is_alphanumeric;
s.chars().any(is_alphanumeric)
}
#[inline]
fn has_ascii_alphanumeric(s: &&str) -> bool {
s.chars().any(|c| c.is_ascii_alphanumeric())
}
#[inline(always)]
fn strip_pos((_, w): (usize, &str)) -> &str {
w
}
#[inline]
fn new_unicode_words_ascii<'a>(s: &'a str) -> AsciiWordsIter<'a> {
new_ascii_word_bound_indices(s)
.map(strip_pos as fn(_) -> _)
.filter(has_ascii_alphanumeric)
}
#[inline]
fn new_unicode_words_general<'a>(s: &'a str) -> UnicodeWordsIter<'a> {
new_word_bounds(s).filter(has_alphanumeric)
}
#[cfg(test)]
mod tests {
use crate::word::{
new_ascii_word_bound_indices, new_unicode_words_ascii, new_word_bound_indices,
};
use std::string::String;
use std::vec;
use std::vec::Vec;
use proptest::prelude::*;
#[test]
fn test_syriac_abbr_mark() {
use crate::tables::word as wd;
let (_, _, cat) = wd::word_category('\u{70f}');
assert_eq!(cat, wd::WC_ALetter);
}
#[test]
fn test_end_of_ayah_cat() {
use crate::tables::word as wd;
let (_, _, cat) = wd::word_category('\u{6dd}');
assert_eq!(cat, wd::WC_Numeric);
}
#[test]
fn test_ascii_word_bound_indices_various_cases() {
let s = "Hello, world!";
let words: Vec<(usize, &str)> = new_ascii_word_bound_indices(s).collect();
let expected = vec![
(0, "Hello"), (5, ","),
(6, " "), (7, "world"), (12, "!"), ];
assert_eq!(words, expected);
}
#[test]
fn test_ascii_word_indices_various_cases() {
let s = "Hello, world! can't e.g. var1 123,456 foo_bar example.com 127.0.0.1:9090";
let words: Vec<&str> = new_unicode_words_ascii(s).collect();
let expected = vec![
("Hello"), ("world"), ("can't"), ("e.g"),
("var1"),
("123,456"), ("foo_bar"),
("example.com"),
("127.0.0.1"),
("9090"), ];
assert_eq!(words, expected);
}
fn ascii_char() -> impl Strategy<Value = char> {
(0u8..=127).prop_map(|b| b as char)
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(10000))]
#[test]
fn proptest_ascii_matches_unicode_word_indices(
s in proptest::collection::vec(ascii_char(), 0..100)
.prop_map(|v| v.into_iter().collect::<String>())
) {
let fast: Vec<(usize, &str)> = new_ascii_word_bound_indices(&s).collect();
let uni: Vec<(usize, &str)> = new_word_bound_indices(&s).collect();
prop_assert_eq!(fast, uni);
}
#[test]
fn proptest_ascii_matches_unicode_word_indices_rev(
s in proptest::collection::vec(ascii_char(), 0..100)
.prop_map(|v| v.into_iter().collect::<String>())
) {
let fast_rev: Vec<(usize, &str)> = new_ascii_word_bound_indices(&s).rev().collect();
let uni_rev : Vec<(usize, &str)> = new_word_bound_indices(&s).rev().collect();
prop_assert_eq!(fast_rev, uni_rev);
}
}
}