#![ doc( html_logo_url = "https://raw.githubusercontent.com/Wandalen/wTools/master/asset/img/logo_v3_trans_square.png" ) ]
#![ doc( html_favicon_url = "https://raw.githubusercontent.com/Wandalen/wTools/alpha/asset/img/logo_v3_trans_square_icon_small_v2.ico" ) ]
#[ cfg( any( feature = "optimize_split", feature = "optimize_match" ) ) ]
use macro_tools::
{
quote::quote,
syn::{ self, Expr, LitStr, Result },
};
#[ cfg( any( feature = "optimize_split", feature = "optimize_match" ) ) ]
use proc_macro::TokenStream;
#[ cfg( feature = "optimize_split" ) ]
#[ proc_macro ]
pub fn optimize_split( input: TokenStream ) -> TokenStream
{
let result = optimize_split_impl( input );
match result
{
Ok( tokens ) => tokens.into(),
Err( e ) => e.to_compile_error().into(),
}
}
#[ cfg( feature = "optimize_match" ) ]
#[ proc_macro ]
pub fn optimize_match( input: TokenStream ) -> TokenStream
{
let result = optimize_match_impl( input );
match result
{
Ok( tokens ) => tokens.into(),
Err( e ) => e.to_compile_error().into(),
}
}
#[ cfg( feature = "optimize_split" ) ]
fn optimize_split_impl( input: TokenStream ) -> Result< macro_tools::proc_macro2::TokenStream >
{
let parsed_input = syn::parse( input )?;
Ok( generate_optimized_split( &parsed_input ) )
}
#[ cfg( feature = "optimize_match" ) ]
fn optimize_match_impl( input: TokenStream ) -> Result< macro_tools::proc_macro2::TokenStream >
{
let parsed_input = syn::parse( input )?;
Ok( generate_optimized_match( &parsed_input ) )
}
#[ cfg( feature = "optimize_split" ) ]
#[ derive( Debug ) ]
#[ allow( clippy::struct_excessive_bools ) ]
struct OptimizeSplitInput
{
source: Expr,
delimiters: Vec< String >,
preserve_delimiters: bool,
preserve_empty: bool,
debug: bool,
}
#[ cfg( feature = "optimize_split" ) ]
impl syn::parse::Parse for OptimizeSplitInput
{
fn parse( input: syn::parse::ParseStream<'_> ) -> Result< Self >
{
let source: Expr = input.parse()?;
input.parse::< syn::Token![,] >()?;
let mut delimiters = Vec::new();
let mut preserve_delimiters = false;
let mut preserve_empty = false;
let mut debug = false;
if input.peek( syn::token::Bracket )
{
let content;
syn::bracketed!( content in input );
while !content.is_empty()
{
let lit: LitStr = content.parse()?;
delimiters.push( lit.value() );
if !content.is_empty()
{
content.parse::< syn::Token![,] >()?;
}
}
}
else
{
let lit: LitStr = input.parse()?;
delimiters.push( lit.value() );
}
while !input.is_empty()
{
input.parse::< syn::Token![,] >()?;
let ident: syn::Ident = input.parse()?;
if ident.to_string().as_str() == "debug" {
debug = true;
} else {
input.parse::< syn::Token![=] >()?;
match ident.to_string().as_str()
{
"preserve_delimiters" =>
{
let lit: syn::LitBool = input.parse()?;
preserve_delimiters = lit.value;
},
"preserve_empty" =>
{
let lit: syn::LitBool = input.parse()?;
preserve_empty = lit.value;
},
_ =>
{
return Err( syn::Error::new( ident.span(), "Unknown parameter" ) );
}
}
}
}
Ok( OptimizeSplitInput
{
source,
delimiters,
preserve_delimiters,
preserve_empty,
debug,
} )
}
}
#[ cfg( feature = "optimize_match" ) ]
#[ derive( Debug ) ]
struct OptimizeMatchInput
{
source: Expr,
patterns: Vec< String >,
strategy: String, debug: bool,
}
#[ cfg( feature = "optimize_match" ) ]
impl syn::parse::Parse for OptimizeMatchInput
{
fn parse( input: syn::parse::ParseStream<'_> ) -> Result< Self >
{
let source: Expr = input.parse()?;
input.parse::< syn::Token![,] >()?;
let mut patterns = Vec::new();
let mut strategy = "first_match".to_string();
let mut debug = false;
if input.peek( syn::token::Bracket )
{
let content;
syn::bracketed!( content in input );
while !content.is_empty()
{
let lit: LitStr = content.parse()?;
patterns.push( lit.value() );
if !content.is_empty()
{
content.parse::< syn::Token![,] >()?;
}
}
}
else
{
let lit: LitStr = input.parse()?;
patterns.push( lit.value() );
}
while !input.is_empty()
{
input.parse::< syn::Token![,] >()?;
let ident: syn::Ident = input.parse()?;
match ident.to_string().as_str()
{
"debug" =>
{
debug = true;
},
"strategy" =>
{
input.parse::< syn::Token![=] >()?;
let lit: LitStr = input.parse()?;
strategy = lit.value();
},
_ =>
{
return Err( syn::Error::new( ident.span(), "Unknown parameter" ) );
}
}
}
Ok( OptimizeMatchInput
{
source,
patterns,
strategy,
debug,
} )
}
}
#[ cfg( feature = "optimize_split" ) ]
fn generate_optimized_split( input: &OptimizeSplitInput ) -> macro_tools::proc_macro2::TokenStream
{
let optimization = analyze_split_pattern( &input.delimiters );
if input.debug
{
eprintln!( "optimize_split! debug: pattern={:?}, optimization={optimization:?}", input.delimiters );
}
match optimization
{
SplitOptimization::SingleCharDelimiter( delim ) => generate_single_char_split( input, &delim ),
SplitOptimization::MultipleCharDelimiters => generate_multi_delimiter_split( input ),
SplitOptimization::ComplexPattern => generate_complex_pattern_split( input ),
}
}
#[ cfg( feature = "optimize_split" ) ]
fn generate_single_char_split( input: &OptimizeSplitInput, delim: &str ) -> macro_tools::proc_macro2::TokenStream
{
let source = &input.source;
let preserve_delimiters = input.preserve_delimiters;
let preserve_empty = input.preserve_empty;
let delim_char = delim.chars().next().unwrap();
if preserve_delimiters || preserve_empty
{
quote!
{
{
let src = #source;
let delim = #delim_char;
let mut result = Vec::new();
let mut start = 0;
for ( i, ch ) in src.char_indices()
{
if ch == delim
{
let segment = &src[ start..i ];
if #preserve_empty || !segment.is_empty()
{
result.push( segment );
}
if #preserve_delimiters
{
result.push( &src[ i..i + 1 ] );
}
start = i + 1;
}
}
let final_segment = &src[ start.. ];
if #preserve_empty || !final_segment.is_empty()
{
result.push( final_segment );
}
result
}
}
}
else
{
quote!
{
{
let src = #source;
src.split( #delim ).collect::< Vec< &str > >()
}
}
}
}
#[ cfg( feature = "optimize_split" ) ]
fn generate_multi_delimiter_split( input: &OptimizeSplitInput ) -> macro_tools::proc_macro2::TokenStream
{
let source = &input.source;
let delimiters = &input.delimiters;
let preserve_delimiters = input.preserve_delimiters;
let preserve_empty = input.preserve_empty;
let delim_array = delimiters.iter().collect::< Vec< _ > >();
quote!
{
{
let src = #source;
let delimiters = [ #( #delim_array ),* ];
let mut result = Vec::new();
let mut start = 0;
let mut i = 0;
let _src_bytes = src.as_bytes();
while i < src.len()
{
let mut found_delimiter = None;
let mut delim_len = 0;
for delim in &delimiters
{
if src[ i.. ].starts_with( delim )
{
found_delimiter = Some( delim );
delim_len = delim.len();
break;
}
}
if let Some( delim ) = found_delimiter
{
let segment = &src[ start..i ];
if #preserve_empty || !segment.is_empty()
{
result.push( segment );
}
if #preserve_delimiters
{
result.push( delim );
}
start = i + delim_len;
i = start;
}
else
{
i += 1;
}
}
let final_segment = &src[ start.. ];
if #preserve_empty || !final_segment.is_empty()
{
result.push( final_segment );
}
result
}
}
}
#[ cfg( feature = "optimize_split" ) ]
fn generate_complex_pattern_split( input: &OptimizeSplitInput ) -> macro_tools::proc_macro2::TokenStream
{
let source = &input.source;
let delimiters = &input.delimiters;
let preserve_delimiters = input.preserve_delimiters;
let preserve_empty = input.preserve_empty;
let delim_array = delimiters.iter().collect::< Vec< _ > >();
quote!
{
{
let src = #source;
let delimiters = [ #( #delim_array ),* ];
let mut result = Vec::new();
let mut remaining = src;
loop
{
let mut min_pos = None;
let mut best_delim = "";
for delim in &delimiters
{
if let Some( pos ) = remaining.find( delim )
{
if min_pos.is_none() || pos < min_pos.unwrap()
{
min_pos = Some( pos );
best_delim = delim;
}
}
}
if let Some( pos ) = min_pos
{
let segment = &remaining[ ..pos ];
if #preserve_empty || !segment.is_empty()
{
result.push( segment );
}
if #preserve_delimiters
{
result.push( best_delim );
}
remaining = &remaining[ pos + best_delim.len().. ];
}
else
{
if #preserve_empty || !remaining.is_empty()
{
result.push( remaining );
}
break;
}
}
result
}
}
}
#[ cfg( feature = "optimize_match" ) ]
fn generate_optimized_match( input: &OptimizeMatchInput ) -> macro_tools::proc_macro2::TokenStream
{
let source = &input.source;
let patterns = &input.patterns;
let strategy = &input.strategy;
let optimization = analyze_match_pattern( patterns, strategy );
if input.debug
{
eprintln!( "optimize_match! debug: patterns={patterns:?}, strategy={strategy:?}, optimization={optimization:?}" );
}
match optimization
{
MatchOptimization::SinglePattern( pattern ) =>
{
quote!
{
{
#source.find( #pattern )
}
}
},
MatchOptimization::TrieBasedMatch =>
{
let _trie_data = build_compile_time_trie( patterns );
quote!
{
{
let mut best_match = None;
for pattern in [ #( #patterns ),* ]
{
if let Some( pos ) = #source.find( pattern )
{
match best_match
{
None => best_match = Some( pos ),
Some( current_pos ) if pos < current_pos => best_match = Some( pos ),
_ => {}
}
}
}
best_match
}
}
},
MatchOptimization::SequentialMatch =>
{
quote!
{
{
let mut result = None;
for pattern in [ #( #patterns ),* ]
{
if let Some( pos ) = #source.find( pattern )
{
result = Some( pos );
break;
}
}
result
}
}
}
}
}
#[ cfg( feature = "optimize_split" ) ]
#[ derive( Debug ) ]
enum SplitOptimization
{
SingleCharDelimiter( String ),
MultipleCharDelimiters,
ComplexPattern,
}
#[ cfg( feature = "optimize_match" ) ]
#[ derive( Debug ) ]
enum MatchOptimization
{
SinglePattern( String ),
TrieBasedMatch,
SequentialMatch,
}
#[ cfg( feature = "optimize_split" ) ]
fn analyze_split_pattern( delimiters: &[ String ] ) -> SplitOptimization
{
if delimiters.len() == 1
{
let delim = &delimiters[0];
if delim.len() == 1
{
SplitOptimization::SingleCharDelimiter( delim.clone() )
}
else
{
SplitOptimization::MultipleCharDelimiters
}
}
else if delimiters.len() <= 8 && delimiters.iter().all( |d| d.len() <= 4 )
{
SplitOptimization::MultipleCharDelimiters
}
else
{
SplitOptimization::ComplexPattern
}
}
#[ cfg( feature = "optimize_match" ) ]
fn analyze_match_pattern( patterns: &[ String ], _strategy: &str ) -> MatchOptimization
{
if patterns.len() == 1
{
MatchOptimization::SinglePattern( patterns[0].clone() )
}
else if patterns.len() <= 16 && patterns.iter().all( |p| p.len() <= 8 )
{
MatchOptimization::TrieBasedMatch
}
else
{
MatchOptimization::SequentialMatch
}
}
#[ cfg( feature = "optimize_match" ) ]
fn build_compile_time_trie( patterns: &[ String ] ) -> Vec< macro_tools::proc_macro2::TokenStream >
{
patterns.iter().map( |pattern| {
let bytes: Vec< u8 > = pattern.bytes().collect();
quote! { &[ #( #bytes ),* ] }
} ).collect()
}