#![cfg_attr(not(test), no_std)]
pub mod sz {
use core::ffi::c_void;
extern "C" {
fn sz_find(
haystack: *const c_void,
haystack_length: usize,
needle: *const c_void,
needle_length: usize,
) -> *const c_void;
fn sz_rfind(
haystack: *const c_void,
haystack_length: usize,
needle: *const c_void,
needle_length: usize,
) -> *const c_void;
fn sz_find_char_from(
haystack: *const c_void,
haystack_length: usize,
needle: *const c_void,
needle_length: usize,
) -> *const c_void;
fn sz_rfind_char_from(
haystack: *const c_void,
haystack_length: usize,
needle: *const c_void,
needle_length: usize,
) -> *const c_void;
fn sz_find_char_not_from(
haystack: *const c_void,
haystack_length: usize,
needle: *const c_void,
needle_length: usize,
) -> *const c_void;
fn sz_rfind_char_not_from(
haystack: *const c_void,
haystack_length: usize,
needle: *const c_void,
needle_length: usize,
) -> *const c_void;
fn sz_edit_distance(
haystack1: *const c_void,
haystack1_length: usize,
haystack2: *const c_void,
haystack2_length: usize,
bound: usize,
allocator: *const c_void,
) -> usize;
fn sz_edit_distance_utf8(
haystack1: *const c_void,
haystack1_length: usize,
haystack2: *const c_void,
haystack2_length: usize,
bound: usize,
allocator: *const c_void,
) -> usize;
fn sz_hamming_distance(
haystack1: *const c_void,
haystack1_length: usize,
haystack2: *const c_void,
haystack2_length: usize,
bound: usize,
) -> usize;
fn sz_hamming_distance_utf8(
haystack1: *const c_void,
haystack1_length: usize,
haystack2: *const c_void,
haystack2_length: usize,
bound: usize,
) -> usize;
fn sz_alignment_score(
haystack1: *const c_void,
haystack1_length: usize,
haystack2: *const c_void,
haystack2_length: usize,
matrix: *const c_void,
gap: i8,
allocator: *const c_void,
) -> isize;
fn sz_generate(
alphabet: *const c_void,
alphabet_size: usize,
text: *mut c_void,
length: usize,
generate: *const c_void,
generator: *mut c_void,
);
}
pub fn find<H, N>(haystack: H, needle: N) -> Option<usize>
where
H: AsRef<[u8]>,
N: AsRef<[u8]>,
{
let haystack_ref = haystack.as_ref();
let needle_ref = needle.as_ref();
let haystack_pointer = haystack_ref.as_ptr() as _;
let haystack_length = haystack_ref.len();
let needle_pointer = needle_ref.as_ptr() as _;
let needle_length = needle_ref.len();
let result = unsafe {
sz_find(
haystack_pointer,
haystack_length,
needle_pointer,
needle_length,
)
};
if result.is_null() {
None
} else {
Some(unsafe { result.offset_from(haystack_pointer) } as usize)
}
}
pub fn rfind<H, N>(haystack: H, needle: N) -> Option<usize>
where
H: AsRef<[u8]>,
N: AsRef<[u8]>,
{
let haystack_ref = haystack.as_ref();
let needle_ref = needle.as_ref();
let haystack_pointer = haystack_ref.as_ptr() as _;
let haystack_length = haystack_ref.len();
let needle_pointer = needle_ref.as_ptr() as _;
let needle_length = needle_ref.len();
let result = unsafe {
sz_rfind(
haystack_pointer,
haystack_length,
needle_pointer,
needle_length,
)
};
if result.is_null() {
None
} else {
Some(unsafe { result.offset_from(haystack_pointer) } as usize)
}
}
pub fn find_char_from<H, N>(haystack: H, needles: N) -> Option<usize>
where
H: AsRef<[u8]>,
N: AsRef<[u8]>,
{
let haystack_ref = haystack.as_ref();
let needles_ref = needles.as_ref();
let haystack_pointer = haystack_ref.as_ptr() as _;
let haystack_length = haystack_ref.len();
let needles_pointer = needles_ref.as_ptr() as _;
let needles_length = needles_ref.len();
let result = unsafe {
sz_find_char_from(
haystack_pointer,
haystack_length,
needles_pointer,
needles_length,
)
};
if result.is_null() {
None
} else {
Some(unsafe { result.offset_from(haystack_pointer) } as usize)
}
}
pub fn rfind_char_from<H, N>(haystack: H, needles: N) -> Option<usize>
where
H: AsRef<[u8]>,
N: AsRef<[u8]>,
{
let haystack_ref = haystack.as_ref();
let needles_ref = needles.as_ref();
let haystack_pointer = haystack_ref.as_ptr() as _;
let haystack_length = haystack_ref.len();
let needles_pointer = needles_ref.as_ptr() as _;
let needles_length = needles_ref.len();
let result = unsafe {
sz_rfind_char_from(
haystack_pointer,
haystack_length,
needles_pointer,
needles_length,
)
};
if result.is_null() {
None
} else {
Some(unsafe { result.offset_from(haystack_pointer) } as usize)
}
}
pub fn find_char_not_from<H, N>(haystack: H, needles: N) -> Option<usize>
where
H: AsRef<[u8]>,
N: AsRef<[u8]>,
{
let haystack_ref = haystack.as_ref();
let needles_ref = needles.as_ref();
let haystack_pointer = haystack_ref.as_ptr() as _;
let haystack_length = haystack_ref.len();
let needles_pointer = needles_ref.as_ptr() as _;
let needles_length = needles_ref.len();
let result = unsafe {
sz_find_char_not_from(
haystack_pointer,
haystack_length,
needles_pointer,
needles_length,
)
};
if result.is_null() {
None
} else {
Some(unsafe { result.offset_from(haystack_pointer) } as usize)
}
}
pub fn rfind_char_not_from<H, N>(haystack: H, needles: N) -> Option<usize>
where
H: AsRef<[u8]>,
N: AsRef<[u8]>,
{
let haystack_ref = haystack.as_ref();
let needles_ref = needles.as_ref();
let haystack_pointer = haystack_ref.as_ptr() as _;
let haystack_length = haystack_ref.len();
let needles_pointer = needles_ref.as_ptr() as _;
let needles_length = needles_ref.len();
let result = unsafe {
sz_rfind_char_not_from(
haystack_pointer,
haystack_length,
needles_pointer,
needles_length,
)
};
if result.is_null() {
None
} else {
Some(unsafe { result.offset_from(haystack_pointer) } as usize)
}
}
pub fn edit_distance_bounded<F, S>(first: F, second: S, bound: usize) -> usize
where
F: AsRef<[u8]>,
S: AsRef<[u8]>,
{
let first_ref = first.as_ref();
let second_ref = second.as_ref();
let first_length = first_ref.len();
let second_length = second_ref.len();
let first_pointer = first_ref.as_ptr() as _;
let second_pointer = second_ref.as_ptr() as _;
unsafe {
sz_edit_distance(
first_pointer,
first_length,
second_pointer,
second_length,
bound,
core::ptr::null(),
)
}
}
pub fn edit_distance_utf8_bounded<F, S>(first: F, second: S, bound: usize) -> usize
where
F: AsRef<[u8]>,
S: AsRef<[u8]>,
{
let first_ref = first.as_ref();
let second_ref = second.as_ref();
let first_length = first_ref.len();
let second_length = second_ref.len();
let first_pointer = first_ref.as_ptr() as _;
let second_pointer = second_ref.as_ptr() as _;
unsafe {
sz_edit_distance_utf8(
first_pointer,
first_length,
second_pointer,
second_length,
bound,
core::ptr::null(),
)
}
}
pub fn edit_distance<F, S>(first: F, second: S) -> usize
where
F: AsRef<[u8]>,
S: AsRef<[u8]>,
{
edit_distance_bounded(first, second, 0)
}
pub fn edit_distance_utf8<F, S>(first: F, second: S) -> usize
where
F: AsRef<[u8]>,
S: AsRef<[u8]>,
{
edit_distance_utf8_bounded(first, second, 0)
}
pub fn hamming_distance_bounded<F, S>(first: F, second: S, bound: usize) -> usize
where
F: AsRef<[u8]>,
S: AsRef<[u8]>,
{
let first_ref = first.as_ref();
let second_ref = second.as_ref();
let first_length = first_ref.len();
let second_length = second_ref.len();
let first_pointer = first_ref.as_ptr() as _;
let second_pointer = second_ref.as_ptr() as _;
unsafe {
sz_hamming_distance(
first_pointer,
first_length,
second_pointer,
second_length,
bound,
)
}
}
pub fn hamming_distance_utf8_bounded<F, S>(first: F, second: S, bound: usize) -> usize
where
F: AsRef<[u8]>,
S: AsRef<[u8]>,
{
let first_ref = first.as_ref();
let second_ref = second.as_ref();
let first_length = first_ref.len();
let second_length = second_ref.len();
let first_pointer = first_ref.as_ptr() as _;
let second_pointer = second_ref.as_ptr() as _;
unsafe {
sz_hamming_distance_utf8(
first_pointer,
first_length,
second_pointer,
second_length,
bound,
)
}
}
pub fn hamming_distance<F, S>(first: F, second: S) -> usize
where
F: AsRef<[u8]>,
S: AsRef<[u8]>,
{
hamming_distance_bounded(first, second, 0)
}
pub fn hamming_distance_utf8<F, S>(first: F, second: S) -> usize
where
F: AsRef<[u8]>,
S: AsRef<[u8]>,
{
hamming_distance_utf8_bounded(first, second, 0)
}
pub fn alignment_score<F, S>(first: F, second: S, matrix: [[i8; 256]; 256], gap: i8) -> isize
where
F: AsRef<[u8]>,
S: AsRef<[u8]>,
{
let first_ref = first.as_ref();
let second_ref = second.as_ref();
let first_length = first_ref.len();
let second_length = second_ref.len();
let first_pointer = first_ref.as_ptr() as _;
let second_pointer = second_ref.as_ptr() as _;
unsafe {
sz_alignment_score(
first_pointer,
first_length,
second_pointer,
second_length,
matrix.as_ptr() as _,
gap,
core::ptr::null(),
)
}
}
pub fn unary_substitution_costs() -> [[i8; 256]; 256] {
let mut result = [[0; 256]; 256];
for i in 0..256 {
for j in 0..256 {
result[i][j] = if i == j { 0 } else { -1 };
}
}
result
}
pub fn randomize<T, A>(text: &mut T, alphabet: &A)
where
T: AsMut<[u8]> + ?Sized, A: AsRef<[u8]> + ?Sized, {
let text_slice = text.as_mut();
let alphabet_slice = alphabet.as_ref();
unsafe {
sz_generate(
alphabet_slice.as_ptr() as *const c_void,
alphabet_slice.len(),
text_slice.as_mut_ptr() as *mut c_void,
text_slice.len(),
core::ptr::null(),
core::ptr::null_mut(),
);
}
}
}
pub trait Matcher<'a> {
fn find(&self, haystack: &'a [u8]) -> Option<usize>;
fn needle_length(&self) -> usize;
fn skip_length(&self, include_overlaps: bool, is_reverse: bool) -> usize;
}
pub enum MatcherType<'a> {
Find(&'a [u8]),
RFind(&'a [u8]),
FindFirstOf(&'a [u8]),
FindLastOf(&'a [u8]),
FindFirstNotOf(&'a [u8]),
FindLastNotOf(&'a [u8]),
}
impl<'a> Matcher<'a> for MatcherType<'a> {
fn find(&self, haystack: &'a [u8]) -> Option<usize> {
match self {
MatcherType::Find(needle) => sz::find(haystack, needle),
MatcherType::RFind(needle) => sz::rfind(haystack, needle),
MatcherType::FindFirstOf(needles) => sz::find_char_from(haystack, needles),
MatcherType::FindLastOf(needles) => sz::rfind_char_from(haystack, needles),
MatcherType::FindFirstNotOf(needles) => sz::find_char_not_from(haystack, needles),
MatcherType::FindLastNotOf(needles) => sz::rfind_char_not_from(haystack, needles),
}
}
fn needle_length(&self) -> usize {
match self {
MatcherType::Find(needle) | MatcherType::RFind(needle) => needle.len(),
_ => 1,
}
}
fn skip_length(&self, include_overlaps: bool, is_reverse: bool) -> usize {
match (include_overlaps, is_reverse) {
(true, true) => self.needle_length().saturating_sub(1),
(true, false) => 1,
(false, true) => 0,
(false, false) => self.needle_length(),
}
}
}
pub struct RangeMatches<'a> {
haystack: &'a [u8],
matcher: MatcherType<'a>,
position: usize,
include_overlaps: bool,
}
impl<'a> RangeMatches<'a> {
pub fn new(haystack: &'a [u8], matcher: MatcherType<'a>, include_overlaps: bool) -> Self {
Self {
haystack,
matcher,
position: 0,
include_overlaps,
}
}
}
impl<'a> Iterator for RangeMatches<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
if self.position >= self.haystack.len() {
return None;
}
if let Some(index) = self.matcher.find(&self.haystack[self.position..]) {
let start = self.position + index;
let end = start + self.matcher.needle_length();
self.position = start + self.matcher.skip_length(self.include_overlaps, false);
Some(&self.haystack[start..end])
} else {
self.position = self.haystack.len();
None
}
}
}
pub struct RangeSplits<'a> {
haystack: &'a [u8],
matcher: MatcherType<'a>,
position: usize,
last_match: Option<usize>,
}
impl<'a> RangeSplits<'a> {
pub fn new(haystack: &'a [u8], matcher: MatcherType<'a>) -> Self {
Self {
haystack,
matcher,
position: 0,
last_match: None,
}
}
}
impl<'a> Iterator for RangeSplits<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
if self.position > self.haystack.len() {
return None;
}
if let Some(index) = self.matcher.find(&self.haystack[self.position..]) {
let start = self.position;
let end = self.position + index;
self.position = end + self.matcher.needle_length();
self.last_match = Some(end);
Some(&self.haystack[start..end])
} else if self.position < self.haystack.len() || self.last_match.is_some() {
let start = self.position;
self.position = self.haystack.len() + 1;
Some(&self.haystack[start..])
} else {
None
}
}
}
pub struct RangeRMatches<'a> {
haystack: &'a [u8],
matcher: MatcherType<'a>,
position: usize,
include_overlaps: bool,
}
impl<'a> RangeRMatches<'a> {
pub fn new(haystack: &'a [u8], matcher: MatcherType<'a>, include_overlaps: bool) -> Self {
Self {
haystack,
matcher,
position: haystack.len(),
include_overlaps,
}
}
}
impl<'a> Iterator for RangeRMatches<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
if self.position == 0 {
return None;
}
let search_area = &self.haystack[..self.position];
if let Some(index) = self.matcher.find(search_area) {
let start = index;
let end = start + self.matcher.needle_length();
let result = Some(&self.haystack[start..end]);
let skip = self.matcher.skip_length(self.include_overlaps, true);
self.position = start + skip;
result
} else {
None
}
}
}
pub struct RangeRSplits<'a> {
haystack: &'a [u8],
matcher: MatcherType<'a>,
position: usize,
}
impl<'a> RangeRSplits<'a> {
pub fn new(haystack: &'a [u8], matcher: MatcherType<'a>) -> Self {
Self {
haystack,
matcher,
position: haystack.len(),
}
}
}
impl<'a> Iterator for RangeRSplits<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
if self.position == 0 {
return None;
}
let search_area = &self.haystack[..self.position];
if let Some(index) = self.matcher.find(search_area) {
let end = self.position;
let start = index + self.matcher.needle_length();
let result = Some(&self.haystack[start..end]);
self.position = index;
result
} else {
let result = Some(&self.haystack[..self.position]);
self.position = 0;
result
}
}
}
pub trait StringZilla<'a, N>
where
N: AsRef<[u8]> + 'a,
{
fn sz_find(&self, needle: N) -> Option<usize>;
fn sz_rfind(&self, needle: N) -> Option<usize>;
fn sz_find_char_from(&self, needles: N) -> Option<usize>;
fn sz_rfind_char_from(&self, needles: N) -> Option<usize>;
fn sz_find_char_not_from(&self, needles: N) -> Option<usize>;
fn sz_rfind_char_not_from(&self, needles: N) -> Option<usize>;
fn sz_edit_distance(&self, other: N) -> usize;
fn sz_alignment_score(&self, other: N, matrix: [[i8; 256]; 256], gap: i8) -> isize;
fn sz_matches(&'a self, needle: &'a N) -> RangeMatches<'a>;
fn sz_rmatches(&'a self, needle: &'a N) -> RangeRMatches<'a>;
fn sz_splits(&'a self, needle: &'a N) -> RangeSplits<'a>;
fn sz_rsplits(&'a self, needle: &'a N) -> RangeRSplits<'a>;
fn sz_find_first_of(&'a self, needles: &'a N) -> RangeMatches<'a>;
fn sz_find_last_of(&'a self, needles: &'a N) -> RangeRMatches<'a>;
fn sz_find_first_not_of(&'a self, needles: &'a N) -> RangeMatches<'a>;
fn sz_find_last_not_of(&'a self, needles: &'a N) -> RangeRMatches<'a>;
}
impl<'a, T, N> StringZilla<'a, N> for T
where
T: AsRef<[u8]> + ?Sized,
N: AsRef<[u8]> + 'a,
{
fn sz_find(&self, needle: N) -> Option<usize> {
sz::find(self, needle)
}
fn sz_rfind(&self, needle: N) -> Option<usize> {
sz::rfind(self, needle)
}
fn sz_find_char_from(&self, needles: N) -> Option<usize> {
sz::find_char_from(self, needles)
}
fn sz_rfind_char_from(&self, needles: N) -> Option<usize> {
sz::rfind_char_from(self, needles)
}
fn sz_find_char_not_from(&self, needles: N) -> Option<usize> {
sz::find_char_not_from(self, needles)
}
fn sz_rfind_char_not_from(&self, needles: N) -> Option<usize> {
sz::rfind_char_not_from(self, needles)
}
fn sz_edit_distance(&self, other: N) -> usize {
sz::edit_distance(self, other)
}
fn sz_alignment_score(&self, other: N, matrix: [[i8; 256]; 256], gap: i8) -> isize {
sz::alignment_score(self, other, matrix, gap)
}
fn sz_matches(&'a self, needle: &'a N) -> RangeMatches<'a> {
RangeMatches::new(self.as_ref(), MatcherType::Find(needle.as_ref()), true)
}
fn sz_rmatches(&'a self, needle: &'a N) -> RangeRMatches<'a> {
RangeRMatches::new(self.as_ref(), MatcherType::RFind(needle.as_ref()), true)
}
fn sz_splits(&'a self, needle: &'a N) -> RangeSplits<'a> {
RangeSplits::new(self.as_ref(), MatcherType::Find(needle.as_ref()))
}
fn sz_rsplits(&'a self, needle: &'a N) -> RangeRSplits<'a> {
RangeRSplits::new(self.as_ref(), MatcherType::RFind(needle.as_ref()))
}
fn sz_find_first_of(&'a self, needles: &'a N) -> RangeMatches<'a> {
RangeMatches::new(
self.as_ref(),
MatcherType::FindFirstOf(needles.as_ref()),
true,
)
}
fn sz_find_last_of(&'a self, needles: &'a N) -> RangeRMatches<'a> {
RangeRMatches::new(
self.as_ref(),
MatcherType::FindLastOf(needles.as_ref()),
true,
)
}
fn sz_find_first_not_of(&'a self, needles: &'a N) -> RangeMatches<'a> {
RangeMatches::new(
self.as_ref(),
MatcherType::FindFirstNotOf(needles.as_ref()),
true,
)
}
fn sz_find_last_not_of(&'a self, needles: &'a N) -> RangeRMatches<'a> {
RangeRMatches::new(
self.as_ref(),
MatcherType::FindLastNotOf(needles.as_ref()),
true,
)
}
}
pub trait MutableStringZilla<A>
where
A: AsRef<[u8]>,
{
fn sz_randomize(&mut self, alphabet: A);
}
impl<T, A> MutableStringZilla<A> for T
where
T: AsMut<[u8]>,
A: AsRef<[u8]>,
{
fn sz_randomize(&mut self, alphabet: A) {
let self_mut = self.as_mut();
let alphabet_ref = alphabet.as_ref();
sz::randomize(self_mut, alphabet_ref);
}
}
#[cfg(test)]
mod tests {
use std::borrow::Cow;
use crate::sz;
use crate::MutableStringZilla;
use crate::StringZilla;
#[test]
fn hamming() {
assert_eq!(sz::hamming_distance("hello", "hello"), 0);
assert_eq!(sz::hamming_distance("hello", "hell"), 1);
assert_eq!(sz::hamming_distance("abc", "adc"), 1);
assert_eq!(sz::hamming_distance_bounded("abcdefgh", "ABCDEFGH", 2), 2);
assert_eq!(sz::hamming_distance_utf8("αβγδ", "αγγδ"), 1);
}
#[test]
fn levenshtein() {
assert_eq!(sz::edit_distance("hello", "hell"), 1);
assert_eq!(sz::edit_distance("hello", "hell"), 1);
assert_eq!(sz::edit_distance("abc", ""), 3);
assert_eq!(sz::edit_distance("abc", "ac"), 1);
assert_eq!(sz::edit_distance("abc", "a_bc"), 1);
assert_eq!(sz::edit_distance("abc", "adc"), 1);
assert_eq!(sz::edit_distance("fitting", "kitty"), 4);
assert_eq!(sz::edit_distance("smitten", "mitten"), 1);
assert_eq!(sz::edit_distance("ggbuzgjux{}l", "gbuzgjux{}l"), 1);
assert_eq!(sz::edit_distance("abcdefgABCDEFG", "ABCDEFGabcdefg"), 14);
assert_eq!(sz::edit_distance_bounded("fitting", "kitty", 2), 2);
assert_eq!(sz::edit_distance_utf8("façade", "facade"), 1);
}
#[test]
fn needleman() {
let costs_vector = sz::unary_substitution_costs();
assert_eq!(
sz::alignment_score("listen", "silent", costs_vector, -1),
-4
);
assert_eq!(
sz::alignment_score("abcdefgABCDEFG", "ABCDEFGabcdefg", costs_vector, -1),
-14
);
assert_eq!(sz::alignment_score("hello", "hello", costs_vector, -1), 0);
assert_eq!(sz::alignment_score("hello", "hell", costs_vector, -1), -1);
}
#[test]
fn search() {
let my_string: String = String::from("Hello, world!");
let my_str: &str = my_string.as_str();
let my_cow_str: Cow<'_, str> = Cow::from(&my_string);
assert_eq!(sz::find("Hello, world!", "world"), Some(7));
assert_eq!(sz::rfind("Hello, world!", "world"), Some(7));
assert_eq!(my_string.sz_find("world"), Some(7));
assert_eq!(my_string.sz_rfind("world"), Some(7));
assert_eq!(my_string.sz_find_char_from("world"), Some(2));
assert_eq!(my_string.sz_rfind_char_from("world"), Some(11));
assert_eq!(my_string.sz_find_char_not_from("world"), Some(0));
assert_eq!(my_string.sz_rfind_char_not_from("world"), Some(12));
assert_eq!(my_str.sz_find("world"), Some(7));
assert_eq!(my_str.sz_find("world"), Some(7));
assert_eq!(my_str.sz_find_char_from("world"), Some(2));
assert_eq!(my_str.sz_rfind_char_from("world"), Some(11));
assert_eq!(my_str.sz_find_char_not_from("world"), Some(0));
assert_eq!(my_str.sz_rfind_char_not_from("world"), Some(12));
assert_eq!(my_cow_str.as_ref().sz_find("world"), Some(7));
assert_eq!(my_cow_str.as_ref().sz_find("world"), Some(7));
assert_eq!(my_cow_str.as_ref().sz_find_char_from("world"), Some(2));
assert_eq!(my_cow_str.as_ref().sz_rfind_char_from("world"), Some(11));
assert_eq!(my_cow_str.as_ref().sz_find_char_not_from("world"), Some(0));
assert_eq!(
my_cow_str.as_ref().sz_rfind_char_not_from("world"),
Some(12)
);
}
#[test]
fn randomize() {
let mut text: Vec<u8> = vec![0; 10]; let alphabet: &[u8] = b"abcd"; text.sz_randomize(alphabet);
assert!(text
.iter()
.all(|&b| b == b'd' || b == b'c' || b == b'b' || b == b'a'));
}
mod search_split_iterators {
use super::*;
use crate::{MatcherType, RangeMatches, RangeRMatches};
#[test]
fn test_matches() {
let haystack = b"hello world hello universe";
let needle = b"hello";
let matches: Vec<_> = haystack.sz_matches(needle).collect();
assert_eq!(matches, vec![b"hello", b"hello"]);
}
#[test]
fn test_rmatches() {
let haystack = b"hello world hello universe";
let needle = b"hello";
let matches: Vec<_> = haystack.sz_rmatches(needle).collect();
assert_eq!(matches, vec![b"hello", b"hello"]);
}
#[test]
fn test_splits() {
let haystack = b"alpha,beta;gamma";
let needle = b",";
let splits: Vec<_> = haystack.sz_splits(needle).collect();
assert_eq!(splits, vec![&b"alpha"[..], &b"beta;gamma"[..]]);
}
#[test]
fn test_rsplits() {
let haystack = b"alpha,beta;gamma";
let needle = b";";
let splits: Vec<_> = haystack.sz_rsplits(needle).collect();
assert_eq!(splits, vec![&b"gamma"[..], &b"alpha,beta"[..]]);
}
#[test]
fn test_splits_with_empty_parts() {
let haystack = b"a,,b,";
let needle = b",";
let splits: Vec<_> = haystack.sz_splits(needle).collect();
assert_eq!(splits, vec![b"a", &b""[..], b"b", &b""[..]]);
}
#[test]
fn test_matches_with_overlaps() {
let haystack = b"aaaa";
let needle = b"aa";
let matches: Vec<_> = haystack.sz_matches(needle).collect();
assert_eq!(matches, vec![b"aa", b"aa", b"aa"]);
}
#[test]
fn test_splits_with_utf8() {
let haystack = "こんにちは,世界".as_bytes();
let needle = b",";
let splits: Vec<_> = haystack.sz_splits(needle).collect();
assert_eq!(splits, vec!["こんにちは".as_bytes(), "世界".as_bytes()]);
}
#[test]
fn test_find_first_of() {
let haystack = b"hello world";
let needles = b"or";
let matches: Vec<_> = haystack.sz_find_first_of(needles).collect();
assert_eq!(matches, vec![b"o", b"o", b"r"]);
}
#[test]
fn test_find_last_of() {
let haystack = b"hello world";
let needles = b"or";
let matches: Vec<_> = haystack.sz_find_last_of(needles).collect();
assert_eq!(matches, vec![b"r", b"o", b"o"]);
}
#[test]
fn test_find_first_not_of() {
let haystack = b"aabbbcccd";
let needles = b"ab";
let matches: Vec<_> = haystack.sz_find_first_not_of(needles).collect();
assert_eq!(matches, vec![b"c", b"c", b"c", b"d"]);
}
#[test]
fn test_find_last_not_of() {
let haystack = b"aabbbcccd";
let needles = b"cd";
let matches: Vec<_> = haystack.sz_find_last_not_of(needles).collect();
assert_eq!(matches, vec![b"b", b"b", b"b", b"a", b"a"]);
}
#[test]
fn test_find_first_of_empty_needles() {
let haystack = b"hello world";
let needles = b"";
let matches: Vec<_> = haystack.sz_find_first_of(needles).collect();
assert_eq!(matches, Vec::<&[u8]>::new());
}
#[test]
fn test_find_last_of_empty_haystack() {
let haystack = b"";
let needles = b"abc";
let matches: Vec<_> = haystack.sz_find_last_of(needles).collect();
assert_eq!(matches, Vec::<&[u8]>::new());
}
#[test]
fn test_find_first_not_of_all_matching() {
let haystack = b"aaabbbccc";
let needles = b"abc";
let matches: Vec<_> = haystack.sz_find_first_not_of(needles).collect();
assert_eq!(matches, Vec::<&[u8]>::new());
}
#[test]
fn test_find_last_not_of_all_not_matching() {
let haystack = b"hello world";
let needles = b"xyz";
let matches: Vec<_> = haystack.sz_find_last_not_of(needles).collect();
assert_eq!(
matches,
vec![b"d", b"l", b"r", b"o", b"w", b" ", b"o", b"l", b"l", b"e", b"h"]
);
}
#[test]
fn test_range_matches_overlapping() {
let haystack = b"aaaa";
let matcher = MatcherType::Find(b"aa");
let matches: Vec<_> = RangeMatches::new(haystack, matcher, true).collect();
assert_eq!(matches, vec![&b"aa"[..], &b"aa"[..], &b"aa"[..]]);
}
#[test]
fn test_range_matches_non_overlapping() {
let haystack = b"aaaa";
let matcher = MatcherType::Find(b"aa");
let matches: Vec<_> = RangeMatches::new(haystack, matcher, false).collect();
assert_eq!(matches, vec![&b"aa"[..], &b"aa"[..]]);
}
#[test]
fn test_range_rmatches_overlapping() {
let haystack = b"aaaa";
let matcher = MatcherType::RFind(b"aa");
let matches: Vec<_> = RangeRMatches::new(haystack, matcher, true).collect();
assert_eq!(matches, vec![&b"aa"[..], &b"aa"[..], &b"aa"[..]]);
}
#[test]
fn test_range_rmatches_non_overlapping() {
let haystack = b"aaaa";
let matcher = MatcherType::RFind(b"aa");
let matches: Vec<_> = RangeRMatches::new(haystack, matcher, false).collect();
assert_eq!(matches, vec![&b"aa"[..], &b"aa"[..]]);
}
}
}