#[ cfg( feature = "std" ) ]
extern crate std;
#[ cfg( all( feature = "use_alloc", not( feature = "std" ) ) ) ]
extern crate alloc;
#[ cfg( feature = "std" ) ]
use std ::string ::String;
#[ cfg( all( feature = "use_alloc", not( feature = "std" ) ) ) ]
use alloc ::string ::String;
#[ cfg( feature = "simd" ) ]
use memchr :: { memchr, memmem };
#[ cfg( feature = "simd" ) ]
use aho_corasick ::AhoCorasick;
#[ cfg( feature = "simd" ) ]
use bytecount;
#[ cfg( all( feature = "string_split", feature = "std" ) ) ]
pub use crate ::string ::split :: { SIMDSplitIterator, simd_split_cached };
#[ derive( Debug ) ]
pub struct SimdStringSearch;
impl SimdStringSearch
{
#[ cfg( feature = "simd" ) ]
#[ must_use ]
pub fn find( haystack: &str, needle: &str ) -> Option< usize >
{
memmem ::find( haystack.as_bytes(), needle.as_bytes() )
}
#[ cfg( not( feature = "simd" ) ) ]
#[ must_use ]
pub fn find( haystack: &str, needle: &str ) -> Option< usize >
{
haystack.find( needle )
}
#[ cfg( feature = "simd" ) ]
#[ must_use ]
pub fn find_any( haystack: &str, needles: &[ &str ] ) -> Option< ( usize, usize ) >
{
let ac = AhoCorasick ::new( needles ).ok()?;
ac.find( haystack ).map( |m| ( m.start(), m.pattern().as_usize() ) )
}
#[ cfg( not( feature = "simd" ) ) ]
#[ must_use ]
pub fn find_any( haystack: &str, needles: &[ &str ] ) -> Option< ( usize, usize ) >
{
let mut earliest_pos = haystack.len();
let mut pattern_idx = 0;
let mut found = false;
for ( idx, needle ) in needles.iter().enumerate()
{
if let Some( pos ) = haystack.find( needle )
{
if pos < earliest_pos
{
earliest_pos = pos;
pattern_idx = idx;
found = true;
}
}
}
if found
{
Some( ( earliest_pos, pattern_idx ) )
}
else
{
None
}
}
#[ cfg( feature = "simd" ) ]
#[ must_use ]
pub fn count_char( s: &str, ch: char ) -> usize
{
if ch.is_ascii()
{
bytecount ::count( s.as_bytes(), ch as u8 )
}
else
{
s.chars().filter( |&c| c == ch ).count()
}
}
#[ cfg( not( feature = "simd" ) ) ]
#[ must_use ]
pub fn count_char( s: &str, ch: char ) -> usize
{
s.chars().filter( |&c| c == ch ).count()
}
#[ cfg( feature = "simd" ) ]
#[ must_use ]
pub fn find_byte( haystack: &str, byte: u8 ) -> Option< usize >
{
memchr( byte, haystack.as_bytes() )
}
#[ cfg( not( feature = "simd" ) ) ]
#[ must_use ]
pub fn find_byte( haystack: &str, byte: u8 ) -> Option< usize >
{
haystack.bytes().position( |b| b == byte )
}
}
pub trait SimdStringExt
{
#[ cfg( all( feature = "string_split", feature = "std" ) ) ]
fn simd_split( &self, delimiters: &[ &str ] ) -> Result< SIMDSplitIterator<'_ >, String >;
fn simd_find( &self, needle: &str ) -> Option< usize >;
fn simd_count( &self, ch: char ) -> usize;
fn simd_find_any( &self, needles: &[ &str ] ) -> Option< ( usize, usize ) >;
fn simd_find_byte( &self, byte: u8 ) -> Option< usize >;
}
impl SimdStringExt for str
{
#[ cfg( all( feature = "string_split", feature = "std" ) ) ]
fn simd_split( &self, delimiters: &[ &str ] ) -> Result< SIMDSplitIterator<'_ >, String >
{
#[ cfg( feature = "simd" ) ]
{
simd_split_cached( self, delimiters )
.map_err( |e| format!( "SIMD split failed: {e:?}" ) )
}
#[ cfg( not( feature = "simd" ) ) ]
{
Err( "SIMD feature not enabled".to_string() )
}
}
fn simd_find( &self, needle: &str ) -> Option< usize >
{
SimdStringSearch ::find( self, needle )
}
fn simd_count( &self, ch: char ) -> usize
{
SimdStringSearch ::count_char( self, ch )
}
fn simd_find_any( &self, needles: &[ &str ] ) -> Option< ( usize, usize ) >
{
SimdStringSearch ::find_any( self, needles )
}
fn simd_find_byte( &self, byte: u8 ) -> Option< usize >
{
SimdStringSearch ::find_byte( self, byte )
}
}
impl SimdStringExt for String
{
#[ cfg( all( feature = "string_split", feature = "std" ) ) ]
fn simd_split( &self, delimiters: &[ &str ] ) -> Result< SIMDSplitIterator<'_ >, String >
{
self.as_str().simd_split( delimiters )
}
fn simd_find( &self, needle: &str ) -> Option< usize >
{
self.as_str().simd_find( needle )
}
fn simd_count( &self, ch: char ) -> usize
{
self.as_str().simd_count( ch )
}
fn simd_find_any( &self, needles: &[ &str ] ) -> Option< ( usize, usize ) >
{
self.as_str().simd_find_any( needles )
}
fn simd_find_byte( &self, byte: u8 ) -> Option< usize >
{
self.as_str().simd_find_byte( byte )
}
}
pub mod utils
{
#[ cfg( feature = "simd" ) ]
#[ must_use ]
pub fn simd_available() -> bool
{
true
}
#[ cfg( not( feature = "simd" ) ) ]
#[ must_use ]
pub fn simd_available() -> bool
{
false
}
#[ must_use ]
pub fn estimated_simd_speedup( input_size: usize, pattern_count: usize ) -> f32
{
if !simd_available()
{
return 1.0;
}
match ( input_size, pattern_count )
{
( 0..=100, _ ) => 1.2,
( 101..=1000, 1 ) => 2.5,
( 101..=1000, 2..=5 ) | ( 1001..=10000, 1 ) => 3.5,
( 101..=1000, _ ) => 4.0,
( 1001..=10000, _ ) | ( _, 2..=5 ) => 6.0,
( _, _ ) => 7.0,
}
}
}