use std::borrow::Cow;
use std::cmp::Ordering;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::str;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
#[cfg(target_arch = "aarch64")]
use std::arch::aarch64::*;
#[derive(Clone, Copy, PartialEq, Eq)]
pub struct FastStr<'a> {
data: &'a [u8],
}
impl<'a> FastStr<'a> {
#[inline]
pub fn new(data: &'a [u8]) -> Self {
Self { data }
}
#[inline]
pub fn from_string(s: &'a str) -> Self {
Self { data: s.as_bytes() }
}
#[inline]
pub unsafe fn from_raw_parts(ptr: *const u8, len: usize) -> Self {
Self {
data: unsafe { std::slice::from_raw_parts(ptr, len) },
}
}
#[inline]
pub fn len(&self) -> usize {
self.data.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
#[inline]
pub fn as_bytes(&self) -> &[u8] {
self.data
}
#[inline]
pub fn as_str(&self) -> Option<&str> {
str::from_utf8(self.data).ok()
}
#[inline]
pub unsafe fn as_str_unchecked(&self) -> &str {
unsafe { str::from_utf8_unchecked(self.data) }
}
#[inline]
pub fn as_ptr(&self) -> *const u8 {
self.data.as_ptr()
}
#[inline]
pub fn get_byte(&self, index: usize) -> Option<u8> {
self.data.get(index).copied()
}
#[inline]
pub unsafe fn get_byte_unchecked(&self, index: usize) -> u8 {
debug_assert!(index < self.len());
unsafe { *self.data.get_unchecked(index) }
}
pub fn substring(&self, start: usize, len: usize) -> FastStr<'a> {
let end = start.saturating_add(len).min(self.data.len());
FastStr::new(&self.data[start..end])
}
pub fn substring_from(&self, start: usize) -> FastStr<'a> {
let start = start.min(self.data.len());
FastStr::new(&self.data[start..])
}
pub fn prefix(&self, len: usize) -> FastStr<'a> {
let len = len.min(self.data.len());
FastStr::new(&self.data[..len])
}
pub fn suffix(&self, len: usize) -> FastStr<'a> {
let len = len.min(self.data.len());
let start = self.data.len() - len;
FastStr::new(&self.data[start..])
}
#[inline]
pub fn starts_with(&self, prefix: FastStr) -> bool {
self.data.starts_with(prefix.data)
}
#[inline]
pub fn ends_with(&self, suffix: FastStr) -> bool {
self.data.ends_with(suffix.data)
}
#[inline]
pub fn find_byte(&self, byte: u8) -> Option<usize> {
self.data.iter().position(|&b| b == byte)
}
pub fn find(&self, needle: FastStr) -> Option<usize> {
if needle.is_empty() {
return Some(0);
}
if needle.len() > self.len() {
return None;
}
if needle.len() == 1 {
return self.find_byte_optimized(needle.data[0]);
}
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
{
if needle.len() >= 4 && self.len() >= 64 {
if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512bw") {
return self.find_avx512(needle);
}
}
}
for i in 0..=(self.len() - needle.len()) {
if &self.data[i..i + needle.len()] == needle.data {
return Some(i);
}
}
None
}
#[inline]
pub fn find_byte_optimized(&self, byte: u8) -> Option<usize> {
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
{
if self.len() >= 64
&& is_x86_feature_detected!("avx512f")
&& is_x86_feature_detected!("avx512bw")
{
return unsafe { self.find_byte_avx512(byte) };
}
}
self.data.iter().position(|&b| b == byte)
}
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
fn find_avx512(&self, needle: FastStr) -> Option<usize> {
unsafe { self.find_avx512_impl(needle) }
}
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
#[target_feature(enable = "avx512f")]
unsafe fn find_avx512_impl(&self, needle: FastStr) -> Option<usize> {
let haystack = self.data;
let needle_data = needle.data;
if needle_data.len() > haystack.len() {
return None;
}
let _first_byte = needle_data[0];
let needle_len = needle_data.len();
let search_end = haystack.len() - needle_len + 1;
let mut i = 0;
while i + 64 <= search_end {
let chunk_end = (i + 64).min(search_end);
for pos in i..chunk_end {
if pos + needle_len <= haystack.len()
&& &haystack[pos..pos + needle_len] == needle_data
{
return Some(pos);
}
}
i += 64;
}
for pos in i..search_end {
if &haystack[pos..pos + needle_len] == needle_data {
return Some(pos);
}
}
None
}
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
#[target_feature(enable = "avx512f")]
unsafe fn find_byte_avx512(&self, byte: u8) -> Option<usize> {
let data = self.data;
let mut i = 0;
while i + 64 <= data.len() {
for j in 0..64 {
if data[i + j] == byte {
return Some(i + j);
}
}
i += 64;
}
for pos in i..data.len() {
if data[pos] == byte {
return Some(pos);
}
}
None
}
pub fn common_prefix_len(&self, other: FastStr) -> usize {
let min_len = self.len().min(other.len());
for i in 0..min_len {
if self.data[i] != other.data[i] {
return i;
}
}
min_len
}
#[inline]
pub fn compare(&self, other: FastStr) -> Ordering {
self.data.cmp(other.data)
}
pub fn hash_fast(&self) -> u64 {
#[cfg(target_arch = "x86_64")]
{
#[cfg(feature = "avx512")]
{
if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512bw") {
return self.hash_avx512();
}
}
if is_x86_feature_detected!("avx2") {
self.hash_avx2()
} else if is_x86_feature_detected!("sse2") {
self.hash_sse2()
} else {
self.hash_fallback()
}
}
#[cfg(target_arch = "aarch64")]
{
if cfg!(feature = "simd") && std::arch::is_aarch64_feature_detected!("neon") {
self.hash_neon()
} else {
self.hash_fallback()
}
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
self.hash_fallback()
}
}
#[cfg(target_arch = "x86_64")]
fn hash_avx2(&self) -> u64 {
unsafe {
let data = self.data;
let mut h = 2134173u64.wrapping_add(data.len() as u64 * 31);
let chunks_32 = data.chunks_exact(32);
let remainder_after_32 = chunks_32.remainder();
for chunk in chunks_32 {
let data_vec = _mm256_loadu_si256(chunk.as_ptr() as *const __m256i);
let mut vals = [0u64; 4];
_mm256_storeu_si256(vals.as_mut_ptr() as *mut __m256i, data_vec);
for val in vals {
h = h.wrapping_add(val);
h = h.wrapping_mul(0x9e3779b97f4a7c15u64);
h ^= h >> 30;
h = h.wrapping_mul(0xbf58476d1ce4e5b9u64);
h ^= h >> 27;
h = h.wrapping_mul(0x94d049bb133111ebu64);
h ^= h >> 31;
}
}
let chunks_8 = remainder_after_32.chunks_exact(8);
let final_remainder = chunks_8.remainder();
for chunk in chunks_8 {
let word = u64::from_le_bytes(chunk.try_into().expect("chunk is 8 bytes"));
h = h.wrapping_add(word);
h = h.wrapping_mul(0x9e3779b97f4a7c15u64);
h ^= h >> 30;
h = h.wrapping_mul(0xbf58476d1ce4e5b9u64);
h ^= h >> 27;
h = h.wrapping_mul(0x94d049bb133111ebu64);
h ^= h >> 31;
}
self.hash_remainder(final_remainder, h)
}
}
#[cfg(target_arch = "x86_64")]
fn hash_sse2(&self) -> u64 {
unsafe {
let data = self.data;
let mut h = 2134173u64.wrapping_add(data.len() as u64 * 31);
let chunks_16 = data.chunks_exact(16);
let remainder_after_16 = chunks_16.remainder();
for chunk in chunks_16 {
let data_vec = _mm_loadu_si128(chunk.as_ptr() as *const __m128i);
let mut vals = [0u64; 2];
_mm_storeu_si128(vals.as_mut_ptr() as *mut __m128i, data_vec);
for val in vals {
h = h.wrapping_add(val);
h = h.wrapping_mul(0x9e3779b97f4a7c15u64);
h ^= h >> 30;
h = h.wrapping_mul(0xbf58476d1ce4e5b9u64);
h ^= h >> 27;
h = h.wrapping_mul(0x94d049bb133111ebu64);
h ^= h >> 31;
}
}
let chunks_8 = remainder_after_16.chunks_exact(8);
let final_remainder = chunks_8.remainder();
for chunk in chunks_8 {
let word = u64::from_le_bytes(chunk.try_into().expect("chunk is 8 bytes"));
h = h.wrapping_add(word);
h = h.wrapping_mul(0x9e3779b97f4a7c15u64);
h ^= h >> 30;
h = h.wrapping_mul(0xbf58476d1ce4e5b9u64);
h ^= h >> 27;
h = h.wrapping_mul(0x94d049bb133111ebu64);
h ^= h >> 31;
}
self.hash_remainder(final_remainder, h)
}
}
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
fn hash_avx512(&self) -> u64 {
unsafe { self.hash_avx512_impl() }
}
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
#[target_feature(enable = "avx512f")]
unsafe fn hash_avx512_impl(&self) -> u64 {
let data = self.data;
let mut h = 2134173u64.wrapping_add(data.len() as u64 * 31);
let chunks_64 = data.chunks_exact(64);
let remainder_after_64 = chunks_64.remainder();
for chunk in chunks_64 {
use std::arch::x86_64::{__m512i, _mm512_loadu_si512, _mm512_storeu_si512};
let data_vec = unsafe { _mm512_loadu_si512(chunk.as_ptr() as *const __m512i) };
let mut vals = [0u64; 8];
unsafe { _mm512_storeu_si512(vals.as_mut_ptr() as *mut __m512i, data_vec) };
for val in vals {
h = h.wrapping_add(val);
h = h.wrapping_mul(0x9e3779b97f4a7c15u64);
h ^= h >> 30;
h = h.wrapping_mul(0xbf58476d1ce4e5b9u64);
h ^= h >> 27;
h = h.wrapping_mul(0x94d049bb133111ebu64);
h ^= h >> 31;
}
}
let chunks_32 = remainder_after_64.chunks_exact(32);
let remainder_after_32 = chunks_32.remainder();
for chunk in chunks_32 {
let data_vec = unsafe { _mm256_loadu_si256(chunk.as_ptr() as *const __m256i) };
let mut vals = [0u64; 4];
unsafe { _mm256_storeu_si256(vals.as_mut_ptr() as *mut __m256i, data_vec) };
for val in vals {
h = h.wrapping_add(val);
h = h.wrapping_mul(0x9e3779b97f4a7c15u64);
h ^= h >> 30;
h = h.wrapping_mul(0xbf58476d1ce4e5b9u64);
h ^= h >> 27;
h = h.wrapping_mul(0x94d049bb133111ebu64);
h ^= h >> 31;
}
}
let chunks_8 = remainder_after_32.chunks_exact(8);
let final_remainder = chunks_8.remainder();
for chunk in chunks_8 {
let word = u64::from_le_bytes(chunk.try_into().expect("chunk is 8 bytes"));
h = h.wrapping_add(word);
h = h.wrapping_mul(0x9e3779b97f4a7c15u64);
h ^= h >> 30;
h = h.wrapping_mul(0xbf58476d1ce4e5b9u64);
h ^= h >> 27;
h = h.wrapping_mul(0x94d049bb133111ebu64);
h ^= h >> 31;
}
self.hash_remainder(final_remainder, h)
}
#[cfg(target_arch = "aarch64")]
fn hash_neon(&self) -> u64 {
unsafe { self.hash_neon_impl() }
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
unsafe fn hash_neon_impl(&self) -> u64 {
let data = self.data;
let mut h = 2134173u64.wrapping_add(data.len() as u64 * 31);
let chunks_16 = data.chunks_exact(16);
let remainder_after_16 = chunks_16.remainder();
for chunk in chunks_16 {
let data_vec = unsafe { vld1q_u8(chunk.as_ptr()) };
let mut bytes = [0u8; 16];
unsafe { vst1q_u8(bytes.as_mut_ptr(), data_vec) };
let val1 = u64::from_le_bytes([
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
]);
let val2 = u64::from_le_bytes([
bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14],
bytes[15],
]);
for val in [val1, val2] {
h = h.wrapping_add(val);
h = h.wrapping_mul(0x9e3779b97f4a7c15u64);
h ^= h >> 30;
h = h.wrapping_mul(0xbf58476d1ce4e5b9u64);
h ^= h >> 27;
h = h.wrapping_mul(0x94d049bb133111ebu64);
h ^= h >> 31;
}
}
let chunks_8 = remainder_after_16.chunks_exact(8);
let final_remainder = chunks_8.remainder();
for chunk in chunks_8 {
let word = u64::from_le_bytes(chunk.try_into().expect("chunk is 8 bytes"));
h = h.wrapping_add(word);
h = h.wrapping_mul(0x9e3779b97f4a7c15u64);
h ^= h >> 30;
h = h.wrapping_mul(0xbf58476d1ce4e5b9u64);
h ^= h >> 27;
h = h.wrapping_mul(0x94d049bb133111ebu64);
h ^= h >> 31;
}
self.hash_remainder(final_remainder, h)
}
fn hash_fallback(&self) -> u64 {
let mut h = 2134173u64.wrapping_add(self.data.len() as u64 * 31);
let chunks = self.data.chunks_exact(8);
let remainder = chunks.remainder();
for chunk in chunks {
let word = u64::from_le_bytes(chunk.try_into().expect("chunk is 8 bytes"));
h = h.wrapping_add(word);
h = h.wrapping_mul(0x9e3779b97f4a7c15u64); h ^= h >> 30;
h = h.wrapping_mul(0xbf58476d1ce4e5b9u64);
h ^= h >> 27;
h = h.wrapping_mul(0x94d049bb133111ebu64);
h ^= h >> 31;
}
self.hash_remainder(remainder, h)
}
fn hash_remainder(&self, remainder: &[u8], mut h: u64) -> u64 {
if remainder.is_empty() {
return h;
}
let chunks = remainder.chunks_exact(8);
let final_remainder = chunks.remainder();
for chunk in chunks {
let word = u64::from_le_bytes(chunk.try_into().expect("chunk is 8 bytes"));
h = h.wrapping_add(word);
h = h.wrapping_mul(0x9e3779b97f4a7c15u64);
h ^= h >> 30;
h = h.wrapping_mul(0xbf58476d1ce4e5b9u64);
h ^= h >> 27;
h = h.wrapping_mul(0x94d049bb133111ebu64);
h ^= h >> 31;
}
for &byte in final_remainder {
h = h.wrapping_add(byte as u64);
h = h.wrapping_mul(0x9e3779b97f4a7c15u64);
h ^= h >> 17;
}
h ^= h >> 33;
h = h.wrapping_mul(0xff51afd7ed558ccdu64);
h ^= h >> 33;
h = h.wrapping_mul(0xc4ceb9fe1a85ec53u64);
h ^= h >> 33;
h
}
pub fn split(&self, delimiter: u8) -> SplitIter<'a> {
SplitIter {
remainder: *self,
delimiter,
}
}
pub fn into_string(&self) -> String {
String::from_utf8_lossy(self.data).into_owned()
}
pub fn to_cow_str(&self) -> Cow<'a, str> {
String::from_utf8_lossy(self.data)
}
}
impl<'a> fmt::Debug for FastStr<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.as_str() {
Some(s) => write!(f, "FastStr({:?})", s),
None => write!(f, "FastStr({:?})", self.data),
}
}
}
impl<'a> fmt::Display for FastStr<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.as_str() {
Some(s) => write!(f, "{}", s),
None => write!(f, "{:?}", self.data),
}
}
}
impl<'a> PartialOrd for FastStr<'a> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.compare(*other))
}
}
impl<'a> Ord for FastStr<'a> {
fn cmp(&self, other: &Self) -> Ordering {
self.compare(*other)
}
}
impl<'a> Hash for FastStr<'a> {
fn hash<H: Hasher>(&self, state: &mut H) {
state.write_u64(self.hash_fast());
}
}
impl<'a> From<&'a str> for FastStr<'a> {
fn from(s: &'a str) -> Self {
Self::from_string(s)
}
}
impl<'a> From<&'a [u8]> for FastStr<'a> {
fn from(bytes: &'a [u8]) -> Self {
Self::new(bytes)
}
}
impl<'a> AsRef<[u8]> for FastStr<'a> {
fn as_ref(&self) -> &[u8] {
self.data
}
}
impl<'a> PartialEq<str> for FastStr<'a> {
fn eq(&self, other: &str) -> bool {
self.data == other.as_bytes()
}
}
impl<'a> PartialEq<&str> for FastStr<'a> {
fn eq(&self, other: &&str) -> bool {
self.data == other.as_bytes()
}
}
impl<'a> PartialEq<String> for FastStr<'a> {
fn eq(&self, other: &String) -> bool {
self.data == other.as_bytes()
}
}
impl<'a> PartialEq<[u8]> for FastStr<'a> {
fn eq(&self, other: &[u8]) -> bool {
self.data == other
}
}
impl<'a> PartialEq<&[u8]> for FastStr<'a> {
fn eq(&self, other: &&[u8]) -> bool {
self.data == *other
}
}
pub struct SplitIter<'a> {
remainder: FastStr<'a>,
delimiter: u8,
}
impl<'a> Iterator for SplitIter<'a> {
type Item = FastStr<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.remainder.is_empty() {
return None;
}
match self.remainder.find_byte(self.delimiter) {
Some(pos) => {
let part = self.remainder.prefix(pos);
self.remainder = self.remainder.substring_from(pos + 1);
Some(part)
}
None => {
let part = self.remainder;
self.remainder = FastStr::new(&[]);
Some(part)
}
}
}
}
pub struct FastStrHash;
impl FastStrHash {
#[inline]
#[allow(dead_code)]
pub fn hash(s: FastStr) -> u64 {
s.hash_fast()
}
}
impl std::hash::BuildHasher for FastStrHash {
type Hasher = FastStrHasher;
fn build_hasher(&self) -> Self::Hasher {
FastStrHasher::new()
}
}
pub struct FastStrHasher {
hash: u64,
}
impl FastStrHasher {
fn new() -> Self {
Self { hash: 0 }
}
}
impl Hasher for FastStrHasher {
fn finish(&self) -> u64 {
self.hash
}
fn write(&mut self, bytes: &[u8]) {
self.hash = FastStr::new(bytes).hash_fast();
}
fn write_u64(&mut self, i: u64) {
self.hash = i;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_operations() {
let s = FastStr::from_string("hello world");
assert_eq!(s.len(), 11);
assert!(!s.is_empty());
assert_eq!(s.as_str().unwrap(), "hello world");
}
#[test]
fn test_substring() {
let s = FastStr::from_string("hello world");
assert_eq!(s.substring(0, 5).as_str().unwrap(), "hello");
assert_eq!(s.substring(6, 5).as_str().unwrap(), "world");
assert_eq!(s.prefix(5).as_str().unwrap(), "hello");
assert_eq!(s.suffix(5).as_str().unwrap(), "world");
}
#[test]
fn test_starts_ends_with() {
let s = FastStr::from_string("hello world");
assert!(s.starts_with(FastStr::from_string("hello")));
assert!(s.ends_with(FastStr::from_string("world")));
assert!(!s.starts_with(FastStr::from_string("world")));
assert!(!s.ends_with(FastStr::from_string("hello")));
}
#[test]
fn test_find() {
let s = FastStr::from_string("hello world");
assert_eq!(s.find(FastStr::from_string("world")), Some(6));
assert_eq!(s.find(FastStr::from_string("xyz")), None);
assert_eq!(s.find_byte(b'o'), Some(4));
assert_eq!(s.find_byte(b'z'), None);
}
#[test]
fn test_common_prefix() {
let s1 = FastStr::from_string("hello");
let s2 = FastStr::from_string("help");
assert_eq!(s1.common_prefix_len(s2), 3);
}
#[test]
fn test_split() {
let s = FastStr::from_string("a,b,c");
let parts: Vec<_> = s.split(b',').collect();
assert_eq!(parts.len(), 3);
assert_eq!(parts[0].as_str().unwrap(), "a");
assert_eq!(parts[1].as_str().unwrap(), "b");
assert_eq!(parts[2].as_str().unwrap(), "c");
}
#[test]
fn test_comparison() {
let s1 = FastStr::from_string("abc");
let s2 = FastStr::from_string("abd");
let s3 = FastStr::from_string("abc");
assert!(s1 < s2);
assert!(s1 == s3);
assert!(s2 > s1);
}
#[test]
fn test_hash() {
let s1 = FastStr::from_string("test");
let s2 = FastStr::from_string("test");
let s3 = FastStr::from_string("different");
assert_eq!(s1.hash_fast(), s2.hash_fast());
assert_ne!(s1.hash_fast(), s3.hash_fast());
}
#[test]
fn test_equality_with_string_types() {
let fs = FastStr::from_string("test");
assert_eq!(fs, "test");
assert_eq!(fs, String::from("test"));
assert_eq!(fs, b"test".as_slice());
}
#[test]
fn test_unsafe_operations() {
let s = FastStr::from_string("hello world");
unsafe {
assert_eq!(s.get_byte_unchecked(0), b'h');
assert_eq!(s.get_byte_unchecked(6), b'w');
let raw_str = s.as_str_unchecked();
assert_eq!(raw_str, "hello world");
}
}
#[test]
fn test_byte_operations() {
let s = FastStr::from_string("hello");
assert_eq!(s.get_byte(0), Some(b'h'));
assert_eq!(s.get_byte(4), Some(b'o'));
assert_eq!(s.get_byte(5), None);
assert_eq!(s.as_ptr(), s.as_bytes().as_ptr());
}
#[test]
fn test_substring_operations() {
let s = FastStr::from_string("hello world");
let from_6 = s.substring_from(6);
assert_eq!(from_6.as_str().unwrap(), "world");
let from_20 = s.substring_from(20); assert_eq!(from_20.len(), 0);
let suffix_5 = s.suffix(5);
assert_eq!(suffix_5.as_str().unwrap(), "world");
let suffix_20 = s.suffix(20); assert_eq!(suffix_20.as_str().unwrap(), "hello world");
}
#[test]
fn test_string_conversions() {
let s = FastStr::from_string("hello world");
let owned = s.into_string();
assert_eq!(owned, "hello world");
let cow = s.to_cow_str();
assert_eq!(cow, "hello world");
let invalid_bytes = &[0xFF, 0xFE, 0xFD];
let s_invalid = FastStr::new(invalid_bytes);
assert!(s_invalid.as_str().is_none());
let cow_invalid = s_invalid.to_cow_str();
assert!(cow_invalid.contains('�')); }
#[test]
fn test_from_raw_parts() {
let data = b"test data";
let s = unsafe { FastStr::from_raw_parts(data.as_ptr(), data.len()) };
assert_eq!(s.as_str().unwrap(), "test data");
assert_eq!(s.len(), 9);
}
#[test]
fn test_display_and_debug() {
let s = FastStr::from_string("hello");
let display = format!("{}", s);
assert_eq!(display, "hello");
let debug = format!("{:?}", s);
assert!(debug.contains("FastStr"));
assert!(debug.contains("hello"));
let invalid = FastStr::new(&[0xFF, 0xFE]);
let debug_invalid = format!("{:?}", invalid);
assert!(debug_invalid.contains("FastStr"));
let display_invalid = format!("{}", invalid);
assert!(display_invalid.contains("255") || display_invalid.contains("�"));
}
#[test]
fn test_ordering() {
let s1 = FastStr::from_string("abc");
let s2 = FastStr::from_string("abd");
let s3 = FastStr::from_string("abc");
assert!(s1 < s2);
assert!(s1 <= s2);
assert!(s1 <= s3);
assert!(s2 > s1);
assert!(s2 >= s1);
assert!(s1 >= s3);
use std::cmp::Ordering;
assert_eq!(s1.compare(s2), Ordering::Less);
assert_eq!(s2.compare(s1), Ordering::Greater);
assert_eq!(s1.compare(s3), Ordering::Equal);
}
#[test]
fn test_hash_consistency() {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let s1 = FastStr::from_string("test string");
let s2 = FastStr::from_string("test string");
let s3 = FastStr::from_string("different");
assert_eq!(s1.hash_fast(), s2.hash_fast());
assert_ne!(s1.hash_fast(), s3.hash_fast());
let mut hasher1 = DefaultHasher::new();
let mut hasher2 = DefaultHasher::new();
s1.hash(&mut hasher1);
s2.hash(&mut hasher2);
assert_eq!(hasher1.finish(), hasher2.finish());
}
#[test]
fn test_empty_string() {
let empty = FastStr::from_string("");
assert!(empty.is_empty());
assert_eq!(empty.len(), 0);
assert_eq!(empty.as_str().unwrap(), "");
assert_eq!(empty.find_byte(b'a'), None);
assert_eq!(empty.find(FastStr::from_string("a")), None);
assert_eq!(empty.common_prefix_len(FastStr::from_string("abc")), 0);
}
#[test]
fn test_find_edge_cases() {
let s = FastStr::from_string("abcdefg");
assert_eq!(s.find(FastStr::from_string("")), Some(0));
assert_eq!(s.find(FastStr::from_string("abcdefghijk")), None);
assert_eq!(s.find(FastStr::from_string("abc")), Some(0));
assert_eq!(s.find(FastStr::from_string("efg")), Some(4));
assert_eq!(s.find(FastStr::from_string("d")), Some(3));
}
#[test]
fn test_split_edge_cases() {
let s = FastStr::from_string("a,,b,c,");
let parts: Vec<_> = s.split(b',').collect();
assert_eq!(parts.len(), 4); assert_eq!(parts[0].as_str().unwrap(), "a");
assert_eq!(parts[1].as_str().unwrap(), "");
assert_eq!(parts[2].as_str().unwrap(), "b");
assert_eq!(parts[3].as_str().unwrap(), "c");
let empty = FastStr::from_string("");
let empty_parts: Vec<_> = empty.split(b',').collect();
assert_eq!(empty_parts.len(), 0);
let no_delim = FastStr::from_string("abcdef");
let no_delim_parts: Vec<_> = no_delim.split(b',').collect();
assert_eq!(no_delim_parts.len(), 1);
assert_eq!(no_delim_parts[0].as_str().unwrap(), "abcdef");
}
#[test]
fn test_equality_edge_cases() {
let s = FastStr::from_string("test");
assert_eq!(s, "test");
assert_ne!(s, "different");
assert_eq!(s, String::from("test"));
assert_ne!(s, String::from("different"));
assert_eq!(s, b"test"[..]);
assert_ne!(s, b"different"[..]);
let bytes: &[u8] = b"test";
assert_eq!(s, bytes);
}
#[test]
fn test_as_ref() {
let s = FastStr::from_string("test");
let bytes: &[u8] = s.as_ref();
assert_eq!(bytes, b"test");
}
#[test]
fn test_from_implementations() {
let from_str: FastStr = "test".into();
assert_eq!(from_str.as_str().unwrap(), "test");
let bytes: &[u8] = b"test";
let from_bytes: FastStr = bytes.into();
assert_eq!(from_bytes.as_bytes(), b"test");
}
#[test]
fn test_fast_str_hasher() {
use crate::string::fast_str::{FastStrHash, FastStrHasher};
use std::hash::{BuildHasher, Hasher};
let build_hasher = FastStrHash;
let mut hasher = build_hasher.build_hasher();
hasher.write(b"test");
let hash1 = hasher.finish();
let mut hasher2 = FastStrHasher::new();
hasher2.write(b"test");
let hash2 = hasher2.finish();
assert_eq!(hash1, hash2);
let mut hasher3 = FastStrHasher::new();
hasher3.write_u64(12345);
assert_eq!(hasher3.finish(), 12345);
let s = FastStr::from_string("test");
let static_hash = FastStrHash::hash(s);
assert_eq!(static_hash, s.hash_fast());
}
#[test]
fn test_simd_hash_consistency() {
let static_strings = [
"",
"a",
"hello",
"hello world",
"The quick brown fox jumps over the lazy dog",
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
];
for &test_str in &static_strings {
let fs = FastStr::from_string(test_str);
let fallback_hash = fs.hash_fallback();
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("sse2") {
let sse2_hash = fs.hash_sse2();
assert_eq!(
fallback_hash,
sse2_hash,
"SSE2 hash mismatch for string: '{}' (len={})",
test_str,
test_str.len()
);
}
if is_x86_feature_detected!("avx2") {
let avx2_hash = fs.hash_avx2();
assert_eq!(
fallback_hash,
avx2_hash,
"AVX2 hash mismatch for string: '{}' (len={})",
test_str,
test_str.len()
);
}
}
let fast_hash = fs.hash_fast();
assert_eq!(
fallback_hash,
fast_hash,
"hash_fast mismatch for string: '{}' (len={})",
test_str,
test_str.len()
);
}
let sizes = [8, 16, 32, 33, 64, 100];
for size in sizes {
let test_str = "a".repeat(size);
let fs = FastStr::from_string(&test_str);
let fallback_hash = fs.hash_fallback();
let fast_hash = fs.hash_fast();
assert_eq!(
fallback_hash, fast_hash,
"hash_fast mismatch for string of size {}",
size
);
}
}
#[test]
fn test_simd_hash_performance_data() {
let test_cases = vec![
vec![0u8; 64],
vec![0xFFu8; 64],
(0..64)
.map(|i| if i % 2 == 0 { 0xAAu8 } else { 0x55u8 })
.collect::<Vec<_>>(),
(0..64).map(|i| (i % 256) as u8).collect::<Vec<_>>(),
[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0].repeat(8),
];
for test_data in test_cases {
let fs = FastStr::new(&test_data);
let fallback = fs.hash_fallback();
let fast = fs.hash_fast();
assert_eq!(fallback, fast, "Hash mismatch for data pattern");
let fast2 = fs.hash_fast();
assert_eq!(fast, fast2, "Hash should be deterministic");
}
}
#[test]
fn test_hash_remainder_function() {
let fs = FastStr::from_string("test");
let base_hash = 12345u64;
let empty_result = fs.hash_remainder(&[], base_hash);
assert_eq!(empty_result, base_hash);
let small_remainder = b"abc";
let small_result = fs.hash_remainder(small_remainder, base_hash);
assert_ne!(small_result, base_hash);
let eight_bytes = b"12345678";
let eight_result = fs.hash_remainder(eight_bytes, base_hash);
assert_ne!(eight_result, base_hash);
let large_remainder = b"123456789012345";
let large_result = fs.hash_remainder(large_remainder, base_hash);
assert_ne!(large_result, base_hash);
let repeat_result = fs.hash_remainder(small_remainder, base_hash);
assert_eq!(small_result, repeat_result);
}
#[test]
fn test_hash_avalanche_effect() {
let base = "hello world";
let base_fs = FastStr::from_string(base);
let base_hash = base_fs.hash_fast();
let modified = "heLlo world";
let modified_fs = FastStr::from_string(modified);
let modified_hash = modified_fs.hash_fast();
assert_ne!(base_hash, modified_hash);
let xor_result = base_hash ^ modified_hash;
let different_bits = xor_result.count_ones();
assert!(
different_bits >= 12,
"Poor avalanche effect: only {} bits different",
different_bits
);
}
#[test]
fn test_hash_distribution() {
let base = "test_string_";
let mut hashes = std::collections::HashSet::new();
for i in 0..100 {
let test_str = format!("{}{:02}", base, i);
let fs = FastStr::from_string(&test_str);
let hash = fs.hash_fast();
assert!(
hashes.insert(hash),
"Duplicate hash found for: {}",
test_str
);
}
assert_eq!(hashes.len(), 100);
}
#[test]
fn test_hash_edge_cases() {
for size in [15, 16, 17, 31, 32, 33] {
let test_data = "x".repeat(size);
let fs = FastStr::from_string(&test_data);
let hash1 = fs.hash_fast();
let hash2 = fs.hash_fast();
assert_eq!(
hash1, hash2,
"Hash should be deterministic for size {}",
size
);
let fallback = fs.hash_fallback();
assert_eq!(
hash1, fallback,
"SIMD hash should match fallback for size {}",
size
);
}
let large_string = "A".repeat(1000);
let large_fs = FastStr::from_string(&large_string);
let large_hash1 = large_fs.hash_fast();
let large_hash2 = large_fs.hash_fast();
assert_eq!(large_hash1, large_hash2);
let null_string = "hello\0world\0test";
let null_fs = FastStr::from_string(null_string);
let null_hash = null_fs.hash_fast();
let null_fallback = null_fs.hash_fallback();
assert_eq!(null_hash, null_fallback);
}
}