#![cfg(feature = "character")]
use core::slice::Iter;
use std::collections::HashMap;
use std::ops::Deref;
#[allow(clippy::len_without_is_empty)]
pub trait BMCharacterSearchable {
fn len(&self) -> usize;
fn value_at(&self, index: usize) -> char;
fn iter(&self) -> Iter<char>;
}
impl BMCharacterSearchable for dyn Deref<Target = [char]> {
#[inline]
fn len(&self) -> usize {
<[char]>::len(self)
}
#[inline]
fn value_at(&self, index: usize) -> char {
self[index]
}
#[inline]
fn iter(&self) -> Iter<char> {
<[char]>::iter(self)
}
}
impl BMCharacterSearchable for Vec<char> {
#[inline]
fn len(&self) -> usize {
Vec::len(self)
}
#[inline]
fn value_at(&self, index: usize) -> char {
self[index]
}
#[inline]
fn iter(&self) -> Iter<char> {
self.as_slice().iter()
}
}
impl<T: BMCharacterSearchable> BMCharacterSearchable for &T {
#[inline]
fn len(&self) -> usize {
<dyn BMCharacterSearchable>::len(*self)
}
#[inline]
fn value_at(&self, index: usize) -> char {
<dyn BMCharacterSearchable>::value_at(*self, index)
}
#[inline]
fn iter(&self) -> Iter<char> {
<dyn BMCharacterSearchable>::iter(*self)
}
}
#[derive(Debug)]
pub struct BMCharacterBadCharShiftMap {
t: HashMap<char, usize>,
}
impl Deref for BMCharacterBadCharShiftMap {
type Target = HashMap<char, usize>;
#[inline]
fn deref(&self) -> &HashMap<char, usize> {
&self.t
}
}
#[derive(Debug)]
pub struct BMCharacterBadCharShiftMapRev {
t: HashMap<char, usize>,
}
impl Deref for BMCharacterBadCharShiftMapRev {
type Target = HashMap<char, usize>;
#[inline]
fn deref(&self) -> &HashMap<char, usize> {
&self.t
}
}
impl BMCharacterBadCharShiftMap {
pub fn create_bad_char_shift_map<T: BMCharacterSearchable>(
pattern: T,
) -> Option<BMCharacterBadCharShiftMap> {
let pattern_len = pattern.len();
if pattern_len == 0 {
return None;
}
let pattern_len_dec = pattern_len - 1;
let mut bad_char_shift_map: HashMap<char, usize> = HashMap::with_capacity(pattern_len_dec);
for (i, c) in pattern.iter().copied().take(pattern_len_dec).enumerate() {
bad_char_shift_map.insert(c, pattern_len_dec - i);
}
Some(BMCharacterBadCharShiftMap {
t: bad_char_shift_map,
})
}
}
impl BMCharacterBadCharShiftMapRev {
pub fn create_bad_char_shift_map<T: BMCharacterSearchable>(
pattern: T,
) -> Option<BMCharacterBadCharShiftMapRev> {
let pattern_len = pattern.len();
if pattern_len == 0 {
return None;
}
let pattern_len_dec = pattern_len - 1;
let mut bad_char_shift_map: HashMap<char, usize> = HashMap::with_capacity(pattern_len_dec);
for (i, c) in pattern.iter().copied().enumerate().rev().take(pattern_len_dec) {
bad_char_shift_map.insert(c, i);
}
Some(BMCharacterBadCharShiftMapRev {
t: bad_char_shift_map,
})
}
}
#[derive(Debug)]
pub struct BMCharacter {
bad_char_shift_map: BMCharacterBadCharShiftMap,
bad_char_shift_map_rev: BMCharacterBadCharShiftMapRev,
pattern: Vec<char>,
}
impl BMCharacter {
pub fn from<T: BMCharacterSearchable>(pattern: T) -> Option<BMCharacter> {
let bad_char_shift_map = BMCharacterBadCharShiftMap::create_bad_char_shift_map(&pattern)?;
let bad_char_shift_map_rev =
BMCharacterBadCharShiftMapRev::create_bad_char_shift_map(&pattern)?;
Some(BMCharacter {
bad_char_shift_map,
bad_char_shift_map_rev,
pattern: pattern.iter().copied().collect(),
})
}
}
impl BMCharacter {
pub fn find_full_all_in<T: BMCharacterSearchable>(&self, text: T) -> Vec<usize> {
find_full(text, &self.pattern, &self.bad_char_shift_map, 0)
}
pub fn find_full_in<T: BMCharacterSearchable>(&self, text: T, limit: usize) -> Vec<usize> {
find_full(text, &self.pattern, &self.bad_char_shift_map, limit)
}
}
impl BMCharacter {
pub fn rfind_full_all_in<T: BMCharacterSearchable>(&self, text: T) -> Vec<usize> {
rfind_full(text, &self.pattern, &self.bad_char_shift_map_rev, 0)
}
pub fn rfind_full_in<T: BMCharacterSearchable>(&self, text: T, limit: usize) -> Vec<usize> {
rfind_full(text, &self.pattern, &self.bad_char_shift_map_rev, limit)
}
}
pub fn find_full<TT: BMCharacterSearchable, TP: BMCharacterSearchable>(
text: TT,
pattern: TP,
bad_char_shift_map: &BMCharacterBadCharShiftMap,
limit: usize,
) -> Vec<usize> {
let text_len = text.len();
let pattern_len = pattern.len();
if text_len == 0 || pattern_len == 0 || text_len < pattern_len {
return vec![];
}
let pattern_len_dec = pattern_len - 1;
let pattern_len_inc = pattern_len + 1;
let last_pattern_char = pattern.value_at(pattern_len_dec);
let mut shift = 0;
let end_index = text_len - pattern_len;
let mut result = vec![];
'outer: loop {
for (i, pc) in pattern.iter().copied().enumerate().rev() {
if text.value_at(shift + i) != pc {
let p = shift + pattern_len;
if p == text_len {
break 'outer;
}
shift += bad_char_shift_map
.get(&text.value_at(shift + pattern_len_dec))
.copied()
.unwrap_or(pattern_len)
.max({
let c = text.value_at(p);
if c == last_pattern_char {
1
} else {
bad_char_shift_map.get(&c).map(|&c| c + 1).unwrap_or(pattern_len_inc)
}
});
if shift > end_index {
break 'outer;
}
continue 'outer;
}
}
result.push(shift);
if shift == end_index {
break;
}
if result.len() == limit {
break;
}
shift += bad_char_shift_map
.get(&text.value_at(shift + pattern_len_dec))
.copied()
.unwrap_or(pattern_len)
.max({
let c = text.value_at(shift + pattern_len);
if c == last_pattern_char {
1
} else {
bad_char_shift_map.get(&c).map(|&c| c + 1).unwrap_or(pattern_len_inc)
}
});
if shift > end_index {
break;
}
}
result
}
pub fn rfind_full<TT: BMCharacterSearchable, TP: BMCharacterSearchable>(
text: TT,
pattern: TP,
bad_char_shift_map: &BMCharacterBadCharShiftMapRev,
limit: usize,
) -> Vec<usize> {
let text_len = text.len();
let pattern_len = pattern.len();
if text_len == 0 || pattern_len == 0 || text_len < pattern_len {
return vec![];
}
let pattern_len_dec = pattern_len - 1;
let pattern_len_inc = pattern_len + 1;
let first_pattern_char = pattern.value_at(0);
let mut shift = text_len - 1;
let start_index = pattern_len_dec;
let mut result = vec![];
'outer: loop {
for (i, pc) in pattern.iter().copied().enumerate() {
if text.value_at(shift - pattern_len_dec + i) != pc {
if shift < pattern_len {
break 'outer;
}
let s = bad_char_shift_map
.get(&text.value_at(shift - pattern_len_dec))
.copied()
.unwrap_or(pattern_len)
.max({
let c = text.value_at(shift - pattern_len);
if c == first_pattern_char {
1
} else {
bad_char_shift_map.get(&c).map(|&c| c + 1).unwrap_or(pattern_len_inc)
}
});
if shift < s {
break 'outer;
}
shift -= s;
if shift < start_index {
break 'outer;
}
continue 'outer;
}
}
result.push(shift - pattern_len_dec);
if shift == start_index {
break;
}
if result.len() == limit {
break;
}
let s = bad_char_shift_map
.get(&text.value_at(shift - pattern_len_dec))
.copied()
.unwrap_or(pattern_len)
.max({
let c = text.value_at(shift - pattern_len);
if c == first_pattern_char {
1
} else {
bad_char_shift_map.get(&c).map(|&c| c + 1).unwrap_or(pattern_len_inc)
}
});
if shift < s {
break;
}
shift -= s;
if shift < start_index {
break;
}
}
result
}
impl BMCharacter {
pub fn find_all_in<T: BMCharacterSearchable>(&self, text: T) -> Vec<usize> {
find(text, &self.pattern, &self.bad_char_shift_map, 0)
}
pub fn find_first_in<T: BMCharacterSearchable>(&self, text: T) -> Option<usize> {
find(text, &self.pattern, &self.bad_char_shift_map, 1).first().copied()
}
pub fn find_in<T: BMCharacterSearchable>(&self, text: T, limit: usize) -> Vec<usize> {
find(text, &self.pattern, &self.bad_char_shift_map, limit)
}
}
impl BMCharacter {
pub fn rfind_all_in<T: BMCharacterSearchable>(&self, text: T) -> Vec<usize> {
rfind(text, &self.pattern, &self.bad_char_shift_map_rev, 0)
}
pub fn rfind_first_in<T: BMCharacterSearchable>(&self, text: T) -> Option<usize> {
rfind(text, &self.pattern, &self.bad_char_shift_map_rev, 1).first().copied()
}
pub fn rfind_in<T: BMCharacterSearchable>(&self, text: T, limit: usize) -> Vec<usize> {
rfind(text, &self.pattern, &self.bad_char_shift_map_rev, limit)
}
}
pub fn find<TT: BMCharacterSearchable, TP: BMCharacterSearchable>(
text: TT,
pattern: TP,
bad_char_shift_map: &BMCharacterBadCharShiftMap,
limit: usize,
) -> Vec<usize> {
let text_len = text.len();
let pattern_len = pattern.len();
if text_len == 0 || pattern_len == 0 || text_len < pattern_len {
return vec![];
}
let pattern_len_dec = pattern_len - 1;
let pattern_len_inc = pattern_len + 1;
let last_pattern_char = pattern.value_at(pattern_len_dec);
let mut shift = 0;
let end_index = text_len - pattern_len;
let mut result = vec![];
'outer: loop {
for (i, pc) in pattern.iter().copied().enumerate().rev() {
if text.value_at(shift + i) != pc {
let p = shift + pattern_len;
if p == text_len {
break 'outer;
}
shift += bad_char_shift_map
.get(&text.value_at(shift + pattern_len_dec))
.copied()
.unwrap_or(pattern_len)
.max({
let c = text.value_at(p);
if c == last_pattern_char {
1
} else {
bad_char_shift_map.get(&c).map(|&c| c + 1).unwrap_or(pattern_len_inc)
}
});
if shift > end_index {
break 'outer;
}
continue 'outer;
}
}
result.push(shift);
if shift == end_index {
break;
}
if result.len() == limit {
break;
}
shift += pattern_len;
if shift > end_index {
break;
}
}
result
}
pub fn rfind<TT: BMCharacterSearchable, TP: BMCharacterSearchable>(
text: TT,
pattern: TP,
bad_char_shift_map: &BMCharacterBadCharShiftMapRev,
limit: usize,
) -> Vec<usize> {
let text_len = text.len();
let pattern_len = pattern.len();
if text_len == 0 || pattern_len == 0 || text_len < pattern_len {
return vec![];
}
let pattern_len_dec = pattern_len - 1;
let pattern_len_inc = pattern_len + 1;
let first_pattern_char = pattern.value_at(0);
let mut shift = text_len - 1;
let start_index = pattern_len_dec;
let mut result = vec![];
'outer: loop {
for (i, pc) in pattern.iter().copied().enumerate() {
if text.value_at(shift - pattern_len_dec + i) != pc {
if shift < pattern_len {
break 'outer;
}
let s = bad_char_shift_map
.get(&text.value_at(shift - pattern_len_dec))
.copied()
.unwrap_or(pattern_len)
.max({
let c = text.value_at(shift - pattern_len);
if c == first_pattern_char {
1
} else {
bad_char_shift_map.get(&c).map(|&c| c + 1).unwrap_or(pattern_len_inc)
}
});
if shift < s {
break 'outer;
}
shift -= s;
if shift < start_index {
break 'outer;
}
continue 'outer;
}
}
result.push(shift - pattern_len_dec);
if shift == start_index {
break;
}
if result.len() == limit {
break;
}
shift -= pattern_len;
if shift < start_index {
break;
}
}
result
}