#[ cfg( all( feature = "simd", feature = "std" ) ) ]
use aho_corasick ::AhoCorasick;
#[ cfg( all( feature = "simd", feature = "std" ) ) ]
use std ::collections ::HashMap;
#[ cfg( all( feature = "simd", feature = "std" ) ) ]
use std ::sync :: { Arc, RwLock };
#[ cfg( feature = "std" ) ]
use std ::borrow ::Cow;
#[ cfg( all( feature = "use_alloc", not( feature = "std" ) ) ) ]
use alloc ::borrow ::Cow;
use super :: { Split, SplitType };
#[ cfg( all( feature = "simd", feature = "std" ) ) ]
#[ derive( Debug ) ]
pub struct SIMDSplitIterator< 'a >
{
input: &'a str,
patterns: Arc< AhoCorasick >,
position: usize,
#[ allow( dead_code ) ] delimiter_patterns: Vec< String >,
last_was_delimiter: bool,
finished: bool,
}
#[ cfg( all( feature = "simd", feature = "std" ) ) ]
impl< 'a > SIMDSplitIterator< 'a >
{
pub fn new( input: &'a str, delimiters: &[ &str ] ) -> Result< Self, aho_corasick ::BuildError >
{
let filtered_delimiters: Vec< &str > = delimiters
.iter()
.filter( |&d| !d.is_empty() )
.copied()
.collect();
let patterns = AhoCorasick ::builder()
.ascii_case_insensitive( false )
.match_kind( aho_corasick ::MatchKind ::LeftmostFirst )
.build( &filtered_delimiters )?;
let delimiter_patterns = filtered_delimiters
.iter()
.map( std ::string ::ToString ::to_string )
.collect();
Ok( Self {
input,
patterns: Arc ::new( patterns ),
position: 0,
delimiter_patterns,
last_was_delimiter: false,
finished: false,
} )
}
#[ must_use ]
pub fn from_cached_patterns(
input: &'a str,
patterns: Arc< AhoCorasick >,
delimiter_patterns: Vec< String >
) -> Self
{
Self {
input,
patterns,
position: 0,
delimiter_patterns,
last_was_delimiter: false,
finished: false,
}
}
}
#[ cfg( all( feature = "simd", feature = "std" ) ) ]
impl< 'a > Iterator for SIMDSplitIterator< 'a >
{
type Item = Split< 'a >;
fn next( &mut self ) -> Option< Self ::Item >
{
if self.finished || self.position > self.input.len()
{
return None;
}
if self.position == self.input.len()
{
self.finished = true;
return None;
}
let remaining = &self.input[ self.position.. ];
if let Some( mat ) = self.patterns.find( remaining )
{
let delimiter_start = self.position + mat.start();
let delimiter_end = self.position + mat.end();
if mat.start() > 0
{
let content = &self.input[ self.position..delimiter_start ];
self.position = delimiter_start;
self.last_was_delimiter = false;
return Some( Split {
string: Cow ::Borrowed( content ),
typ: SplitType ::Delimited,
start: self.position - content.len(),
end: self.position,
was_quoted: false,
} );
}
let delimiter = &self.input[ delimiter_start..delimiter_end ];
self.position = delimiter_end;
self.last_was_delimiter = true;
Some( Split {
string: Cow ::Borrowed( delimiter ),
typ: SplitType ::Delimiter,
start: delimiter_start,
end: delimiter_end,
was_quoted: false,
} )
}
else
{
if self.position < self.input.len()
{
let content = &self.input[ self.position.. ];
let start = self.position;
self.position = self.input.len();
self.finished = true;
Some( Split {
string: Cow ::Borrowed( content ),
typ: SplitType ::Delimited,
start,
end: self.input.len(),
was_quoted: false,
} )
}
else
{
self.finished = true;
None
}
}
}
}
#[ cfg( all( feature = "simd", feature = "std" ) ) ]
use std ::sync ::LazyLock;
#[ cfg( all( feature = "simd", feature = "std" ) ) ]
static PATTERN_CACHE: LazyLock< RwLock<HashMap<Vec< String >, Arc< AhoCorasick >>>> =
LazyLock ::new(|| RwLock ::new(HashMap ::new()));
#[ cfg( all( feature = "simd", feature = "std" ) ) ]
pub fn get_or_create_cached_patterns( delimiters: &[ &str ] ) -> Result< Arc< AhoCorasick >, aho_corasick ::BuildError >
{
let delimiter_key: Vec< String > = delimiters
.iter()
.filter( |&d| !d.is_empty() )
.map( |s| (*s).to_string() )
.collect();
{
let cache = PATTERN_CACHE.read().unwrap();
if let Some( patterns ) = cache.get( &delimiter_key )
{
return Ok( Arc ::clone( patterns ) );
}
}
let patterns = AhoCorasick ::builder()
.ascii_case_insensitive( false )
.match_kind( aho_corasick ::MatchKind ::LeftmostFirst )
.build( &delimiter_key )?;
let patterns_arc = Arc ::new( patterns );
{
let mut cache = PATTERN_CACHE.write().unwrap();
if cache.len() >= 64
{
cache.clear(); }
cache.insert( delimiter_key, Arc ::clone( &patterns_arc ) );
}
Ok( patterns_arc )
}
#[ cfg( all( feature = "simd", feature = "std" ) ) ]
pub fn simd_split_cached< 'a >( input: &'a str, delimiters: &[ &str ] ) -> Result< SIMDSplitIterator<'a >, aho_corasick ::BuildError >
{
let patterns = get_or_create_cached_patterns( delimiters )?;
let delimiter_patterns: Vec< String > = delimiters
.iter()
.filter( |&d| !d.is_empty() )
.map( |s| (*s).to_string() )
.collect();
Ok( SIMDSplitIterator ::from_cached_patterns( input, patterns, delimiter_patterns ) )
}
#[ cfg( not( all( feature = "simd", feature = "std" ) ) ) ]
pub struct SIMDSplitIterator< 'a >( std ::marker ::PhantomData< &'a str > );
#[ cfg( not( all( feature = "simd", feature = "std" ) ) ) ]
impl< 'a > SIMDSplitIterator< 'a >
{
pub fn new( _input: &'a str, _delimiters: &[ &str ] ) -> Result< Self, &'static str >
{
Err( "SIMD feature not enabled" )
}
}
#[ cfg( not( all( feature = "simd", feature = "std" ) ) ) ]
impl< 'a > Iterator for SIMDSplitIterator< 'a >
{
type Item = Split< 'a >;
fn next( &mut self ) -> Option< Self ::Item >
{
None
}
}
#[ cfg( not( all( feature = "simd", feature = "std" ) ) ) ]
pub fn simd_split_cached< 'a >( _input: &'a str, _delimiters: &[ &str ] ) -> Result< SIMDSplitIterator<'a >, &'static str >
{
Err( "SIMD feature not enabled" )
}