#![deny(missing_docs)]
mod finder;
use finder::*;
use rayon::prelude::*;
use std::num::NonZeroUsize;
pub trait CommonStr {
fn common_prefix(&self) -> Option<String> {
self.common_prefix_ref().map(|s| s.to_string())
}
fn common_suffix(&self) -> Option<String> {
self.common_suffix_ref().map(|s| s.to_string())
}
fn common_prefix_len(&self) -> Option<NonZeroUsize> {
self.common_prefix_ref()
.map(|s| unsafe { NonZeroUsize::new_unchecked(s.len()) })
}
fn common_suffix_len(&self) -> Option<NonZeroUsize> {
self.common_suffix_ref()
.map(|s| unsafe { NonZeroUsize::new_unchecked(s.len()) })
}
fn common_prefix_ref(&self) -> Option<&str>;
fn common_suffix_ref(&self) -> Option<&str>;
}
pub trait CommonRaw<T: Clone> {
fn common_prefix_raw(&self) -> Option<Vec<T>> {
self.common_prefix_raw_ref().map(|s| s.to_vec())
}
fn common_suffix_raw(&self) -> Option<Vec<T>> {
self.common_suffix_raw_ref().map(|s| s.to_vec())
}
fn common_prefix_raw_len(&self) -> Option<NonZeroUsize> {
self.common_prefix_raw_ref()
.map(|s| unsafe { NonZeroUsize::new_unchecked(s.len()) })
}
fn common_suffix_raw_len(&self) -> Option<NonZeroUsize> {
self.common_suffix_raw_ref()
.map(|s| unsafe { NonZeroUsize::new_unchecked(s.len()) })
}
fn common_prefix_raw_ref(&self) -> Option<&[T]>;
fn common_suffix_raw_ref(&self) -> Option<&[T]>;
}
impl<C: ?Sized, T> CommonStr for C
where
for<'a> &'a C: IntoParallelIterator<Item = &'a T>,
T: AsRef<str> + Sync,
{
fn common_prefix_ref(&self) -> Option<&str> {
find_common::<_, StringPrefix, _, _>(self)
}
fn common_suffix_ref(&self) -> Option<&str> {
find_common::<_, StringSuffix, _, _>(self)
}
}
impl<C: ?Sized, T, U> CommonRaw<U> for C
where
for<'a> &'a C: IntoParallelIterator<Item = &'a T>,
T: AsRef<[U]> + Sync,
U: Clone + Eq + Sync,
{
fn common_prefix_raw_ref(&self) -> Option<&[U]> {
find_common::<_, GenericPrefix, _, _>(self)
}
fn common_suffix_raw_ref(&self) -> Option<&[U]> {
find_common::<_, GenericSuffix, _, _>(self)
}
}
fn find_common<C: ?Sized, F, T, U>(collection: &C) -> Option<&U>
where
for<'a> &'a C: IntoParallelIterator<Item = &'a T>,
F: Finder<U>,
T: AsRef<U> + Sync,
U: ?Sized + Sync,
{
collection
.into_par_iter()
.try_fold(
|| None,
|previous, current| {
let cur_ref = current.as_ref();
match previous {
Some(prev) => F::common(prev, cur_ref).map(Some),
None => Some(Some(cur_ref)),
}
},
)
.try_reduce(
|| None,
|a, b| match (a, b) {
(Some(a), Some(b)) => F::common(a, b).map(Some),
(Some(common), None) | (None, Some(common)) => Some(Some(common)),
(None, None) => None,
},
)
.flatten()
}
#[cfg(test)]
mod tests {
use super::{CommonRaw, CommonStr};
use std::hint::black_box;
use std::iter;
use ya_rand::*;
const BASE_LEN: usize = 19;
const COMMON: &str = "this is just a simple sentence";
const EXT_LEN: usize = 13;
const TOTAL_LEN: usize = BASE_LEN + EXT_LEN;
const VEC_LEN: usize = 1 << 15;
#[test]
fn str_prefix_sanity() {
let v = black_box(vec![COMMON.to_string(); BASE_LEN]);
let common = v.common_prefix_ref().unwrap();
assert_eq!(common, COMMON);
assert_eq!(common.len(), COMMON.len());
}
#[test]
fn str_suffix_sanity() {
let v = black_box(vec![COMMON.to_string(); BASE_LEN]);
let common = v.common_suffix_ref().unwrap();
assert_eq!(common, COMMON);
assert_eq!(common.len(), COMMON.len());
}
#[test]
fn raw_prefix_sanity() {
let v = black_box(vec![COMMON.as_bytes().to_vec(); BASE_LEN]);
let common = v.common_prefix_raw_ref().unwrap();
assert_eq!(common, COMMON.as_bytes());
assert_eq!(common.len(), COMMON.len());
}
#[test]
fn raw_suffix_sanity() {
let v = black_box(vec![COMMON.as_bytes().to_vec(); BASE_LEN]);
let common = v.common_suffix_raw_ref().unwrap();
assert_eq!(common, COMMON.as_bytes());
assert_eq!(common.len(), COMMON.len());
}
#[test]
fn misc() {
let input: [String; 0] = [];
let prefix = input.common_prefix();
assert_eq!(prefix, None);
let suffix = input.common_suffix();
assert_eq!(suffix, None);
let input = ["just a single entry"];
let prefix = input.common_prefix().unwrap();
assert_eq!(prefix, input[0]);
let suffix = input.common_suffix().unwrap();
assert_eq!(suffix, input[0]);
let input = ["foobar", "fooqux", "foodle", "fookys"];
let prefix = input.common_prefix().unwrap();
assert_eq!(prefix, "foo");
let suffix = input.common_suffix();
assert_eq!(suffix, None);
let input = ["café", "caféine"];
let prefix = input.common_prefix().unwrap();
assert_eq!(prefix, "café");
let suffix = input.common_suffix();
assert_eq!(suffix, None);
let input = ["äbc", "âbc"];
let prefix = input.common_prefix();
assert_eq!(prefix, None);
let suffix = input.common_suffix().unwrap();
assert_eq!(suffix, "bc");
let input = ["abc€", "xyz€"];
let prefix = input.common_prefix();
assert_eq!(prefix, None);
let suffix = input.common_suffix().unwrap();
assert_eq!(suffix, "€");
let input = ["abcä", "defâ"];
let prefix = input.common_prefix();
assert_eq!(prefix, None);
let suffix = input.common_suffix();
assert_eq!(suffix, None);
let input = ["some thingy", "nothing"];
let prefix = input.common_prefix();
assert_eq!(prefix, None);
let suffix = input.common_suffix();
assert_eq!(suffix, None);
let input = ["-lol-", "_lol_"];
let prefix = input.common_prefix();
assert_eq!(prefix, None);
let suffix = input.common_suffix();
assert_eq!(suffix, None);
let input = ["a🤖b", "a🤡b"];
let prefix = input.common_prefix().unwrap();
assert_eq!(prefix, "a");
let suffix = input.common_suffix().unwrap();
assert_eq!(suffix, "b");
let input = ["résumé", "résister"];
let prefix = input.common_prefix().unwrap();
assert_eq!(prefix, "rés");
let suffix = input.common_suffix();
assert_eq!(suffix, None);
let input = ["abcédef", "xyzèdef"];
let prefix = input.common_prefix();
assert_eq!(prefix, None);
let suffix = input.common_suffix().unwrap();
assert_eq!(suffix, "def");
let input = ["Goodbye 👋", "Farewell 👋"];
let prefix = input.common_prefix();
assert_eq!(prefix, None);
let suffix = input.common_suffix().unwrap();
assert_eq!(suffix, " 👋");
let input = ["Family: 👨👩👧", "Group: 👨👩👧"];
let prefix = input.common_prefix();
assert_eq!(prefix, None);
let suffix = input.common_suffix().unwrap();
assert_eq!(suffix, ": 👨👩👧");
let input = ["just some words 世界", "世界"];
let prefix = input.common_prefix();
assert_eq!(prefix, None);
let suffix = input.common_suffix().unwrap();
assert_eq!(suffix, "世界");
let input = ["tests😀", "best😀"];
let prefix = input.common_prefix();
assert_eq!(prefix, None);
let suffix = input.common_suffix().unwrap();
assert_eq!(suffix, "😀");
let input = ["wowie_bruhther_clap", "wowie-lol-clap", "wowie_xd_clap"];
let prefix = input.common_prefix().unwrap();
assert_eq!(prefix, "wowie");
let suffix = input.common_suffix().unwrap();
assert_eq!(suffix, "clap");
}
#[test]
fn prefix_ascii() {
let mut rng = new_rng();
let base = new_string_with::<BASE_LEN, _>(|| random_ascii(&mut rng));
let mut strings = vec![String::with_capacity(TOTAL_LEN); VEC_LEN];
strings.iter_mut().for_each(|s| {
let ext = new_string_with::<EXT_LEN, _>(|| random_ascii(&mut rng));
s.push_str(&base);
s.push_str(&ext);
});
let prefix = strings.common_prefix().unwrap();
assert_eq!(base, prefix);
}
#[test]
fn suffix_ascii() {
let mut rng = new_rng();
let base = new_string_with::<BASE_LEN, _>(|| random_ascii(&mut rng));
let mut strings = vec![String::with_capacity(TOTAL_LEN); VEC_LEN];
strings.iter_mut().for_each(|s| {
let ext = new_string_with::<EXT_LEN, _>(|| random_ascii(&mut rng));
s.push_str(&ext);
s.push_str(&base);
});
let suffix = strings.common_suffix().unwrap();
assert_eq!(base, suffix);
}
fn random_ascii(rng: &mut ShiroRng) -> char {
rng.bits(7) as u8 as char
}
#[test]
fn prefix_char() {
let mut rng = new_rng();
let base = new_string_with::<BASE_LEN, _>(|| random_char(&mut rng));
let mut strings = vec![String::with_capacity(TOTAL_LEN * 4); VEC_LEN];
strings.iter_mut().for_each(|s| {
let ext = new_string_with::<EXT_LEN, _>(|| random_char(&mut rng));
s.push_str(&base);
s.push_str(&ext);
});
let prefix = strings.common_prefix().unwrap();
assert_eq!(base, prefix);
}
#[test]
fn suffix_char() {
let mut rng = new_rng();
let base = new_string_with::<BASE_LEN, _>(|| random_char(&mut rng));
let mut strings = vec![String::with_capacity(TOTAL_LEN * 4); VEC_LEN];
strings.iter_mut().for_each(|s| {
let ext = new_string_with::<EXT_LEN, _>(|| random_char(&mut rng));
s.push_str(&ext);
s.push_str(&base);
});
let suffix = strings.common_suffix().unwrap();
assert_eq!(base, suffix);
}
fn random_char(rng: &mut ShiroRng) -> char {
loop {
let val = rng.bits(21) as u32;
match char::from_u32(val) {
Some(c) => return c,
None => continue,
}
}
}
fn new_string_with<const SIZE: usize, F>(f: F) -> String
where
F: FnMut() -> char,
{
iter::repeat_with(f).take(SIZE).collect()
}
#[test]
fn prefix_generic() {
let mut rng = new_rng();
let base = new_vec_with::<BASE_LEN, _>(|| rng.u64());
let mut nested = vec![Vec::new(); VEC_LEN];
nested.iter_mut().for_each(|cur| {
let ext = new_vec_with::<EXT_LEN, _>(|| rng.u64());
cur.extend_from_slice(&base);
cur.extend_from_slice(&ext);
});
let prefix = nested.common_prefix_raw().unwrap();
assert_eq!(base, prefix);
}
#[test]
fn suffix_generic() {
let mut rng = new_rng();
let base = new_vec_with::<BASE_LEN, _>(|| rng.u64());
let mut nested = vec![Vec::new(); VEC_LEN];
nested.iter_mut().for_each(|cur| {
let ext = new_vec_with::<EXT_LEN, _>(|| rng.u64());
cur.extend_from_slice(&ext);
cur.extend_from_slice(&base);
});
let prefix = nested.common_suffix_raw().unwrap();
assert_eq!(base, prefix);
}
fn new_vec_with<const SIZE: usize, F>(f: F) -> Vec<u64>
where
F: FnMut() -> u64,
{
iter::repeat_with(f).take(SIZE).collect()
}
}