rsonpath/classification/
simd.rs

1//! SIMD configuration and runtime dispatch.
2//!
3//! The core of our approach is the two macros: [`config_simd`] and [`dispatch_simd`].
4//!
5//! ## What?
6//!
7//! We need to strike a delicate balance between portable code and compiler optimizations.
8//! All SIMD code should be maximally inlined. To that end we need `target_feature` annotated
9//! functions, so that SIMD intrinsics are actually emitted, but these are hard barriers for
10//! inlining if called from non-`target_feature` functions.
11//!
12//! The ideal would be to have a single `target_feature`-annotated entry point and then force
13//! the compiler to inline everything there. This isn't that easy, because you cannot *really*
14//! force the compiler to inline everything, and even if you could that can lead to inefficient compilation
15//! (large functions are harder to optimize, large code size negatively impacts caching, etc.).
16//!
17//! On the other end of portability there's runtime checking of CPU capabilities.
18//! That introduces an overhead and cannot be used everywhere, so in any case it'd have to be added
19//! in specific places that then call `target_feature`-annotated functions.
20//!
21//! The [`multiversion`](https://calebzulawski.github.io/rust-simd-book/3.3-multiversion.html) crate
22//! provides a near-ideal tradeoff, where one can annotate a function such that multiple definitions of it
23//! are expanded with different `target_feature` sets, and an efficient, cached runtime check is performed
24//! at entry to that function.
25//!
26//! For our crate we can do slightly better. The idea is to do the entire configuration of SIMD once and
27//! upfront ([`configure`] producing [`SimdConfiguration`]) and save it. Then we can use [`config_simd`]
28//! to create a [`Simd`] implementation that encapsulates all the relevant information in its type arguments.
29//! An entry point function can take a generic `V: Simd` parameter so that the compiler specializes it
30//! for all supported CPU capability configurations. Finally, to get the correct `target_feature` annotation
31//! we do a similar thing to `multiversion`, but using a constant value from the `V` type, which allows
32//! the compiler to optimize the check away when monomorphizing the function.
33//!
34//! An example idiomatic usage would be:
35//!
36//! ```rust,ignore
37//! fn entry() -> Result<(), EngineError> {
38//!   let configuration = simd::configure();
39//!   config_simd!(configuration => |simd| {
40//!       run(simd)
41//!   })
42//! }
43//!
44//! fn run<V: Simd>(simd: V) -> Result<(), EngineError> {
45//!   dispatch_simd!(simd; simd => {
46//!     fn<V: Simd>(simd: V) -> Result<(), EngineError>
47//!     {
48//!       // Actual implementation using SIMD capabilities from `simd`.
49//!     }
50//!   });
51//! }
52//! ```
53//!
54//! Assume for a second we only have 3 SIMD combinations:
55//! - `+avx2,+pclmulqdq,+popcnt`
56//! - `+sse2,+popcnt`
57//! - `nosimd`
58//!
59//! The above code gets expanded to (approximately):
60//!
61//! ```rust,ignore
62//! fn entry() -> Result<(), EngineError> {
63//!   let configuration = simd::configure();
64//!   {
65//!     match configuration.highest_simd() {
66//!       SimdTag::Avx2 => {
67//!         let simd = ResolvedSimd::<
68//!           quotes::avx2_64::Constructor,
69//!           structural::avx2_64::Constructor,
70//!           depth::avx2_64::Constructor,
71//!           memmem::avx2_64::Constructor,
72//!           simd::AVX2_PCLMULQDQ_POPCNT,
73//!         >::new();
74//!         run(simd)
75//!       },
76//!       SimdTag::Sse2 if conf.fast_popcnt() => {
77//!         let simd = ResolvedSimd::<
78//!           quotes::nosimd::Constructor,
79//!           structural::nosimd::Constructor,
80//!           depth::sse2_64::Constructor,
81//!           memmem::sse2_64::Constructor,
82//!           simd::SSE2_POPCNT,
83//!         >::new();
84//!         run(simd)
85//!       },
86//!       _ => {
87//!         let simd = ResolvedSimd::<
88//!           quotes::nosimd::Constructor,
89//!           structural::nosimd::Constructor,
90//!           depth::nosimd::Constructor,
91//!           memmem::nosimd::Constructor,
92//!           simd::NOSIMD,
93//!         >::new();
94//!         run(simd)
95//!       },
96//!     }
97//!   }
98//! }
99//!
100//! fn run<V: Simd>(simd: V) -> Result<(), EngineError> {
101//!   #[target_feature(enable = "avx2")]
102//!   #[target_feature(enable = "pclmulqdq")]
103//!   #[target_feature(enable = "popcnt")]
104//!   unsafe fn avx2_pclmulqdq_popcnt<V: Simd>(simd: V) -> Result<(), EngineError> {
105//!     // Actual implementation using SIMD capabilities from `simd`.
106//!   }
107//!   #[target_feature(enable = "sse2")]
108//!   #[target_feature(enable = "popcnt")]
109//!   unsafe fn sse2_popcnt<V: Simd>(simd: V) -> Result<(), EngineError> {
110//!     // Actual implementation using SIMD capabilities from `simd`.
111//!   }
112//!   unsafe fn nosimd<V: Simd>(simd: V) -> Result<(), EngineError> {
113//!     // Actual implementation using SIMD capabilities from `simd`.
114//!   }
115//!   
116//!   // SAFETY: depends on the provided SimdConfig, which cannot be incorrectly constructed.
117//!   unsafe {
118//!       match simd.dispatch_tag() {
119//!           simd::AVX2_PCLMULQDQ_POPCNT => avx2_pclmulqdq_popcnt(simd),
120//!           simd::SSE2_POPCNT => sse2_popcnt(simd),
121//!           _ => nosimd(simd),
122//!       }
123//!   }
124//! }
125//! ```
126//!
127//! Now because all of the logic in the `dispatch_simd` is done over the `V` type constants,
128//! the compiler will produce a `run` function for the three possible `ResolvedSimd` concrete
129//! types used and then constant-fold the body to produce code equivalent to this (not valid Rust code):
130//!
131//! ```rust,ignore
132//! fn run(simd: ResolvedSimd::<
133//!           quotes::avx2_64::Constructor,
134//!           structural::avx2_64::Constructor,
135//!           depth::avx2_64::Constructor,
136//!           memmem::avx2_64::Constructor,
137//!           simd::AVX2_PCLMULQDQ_POPCNT,
138//!         >) -> Result<(), EngineError> {
139//!   #[target_feature(enable = "avx2")]
140//!   #[target_feature(enable = "pclmulqdq")]
141//!   #[target_feature(enable = "popcnt")]
142//!   unsafe fn avx2_pclmulqdq_popcnt(simd: Avx2Simd = ResolvedSimd::<
143//!           quotes::avx2_64::Constructor,
144//!           structural::avx2_64::Constructor,
145//!           depth::avx2_64::Constructor,
146//!           memmem::avx2_64::Constructor,
147//!           simd::AVX2_PCLMULQDQ_POPCNT,
148//!         >) -> Result<(), EngineError> {
149//!     // Actual implementation using SIMD capabilities from `simd`.
150//!   }
151//!
152//!   unsafe { avx2_pclmulqdq_popcnt(simd) }
153//! }
154//!
155//! fn run(simd: ResolvedSimd::<
156//!           quotes::nosimd::Constructor,
157//!           structural::nosimd::Constructor,
158//!           depth::sse2_64::Constructor,
159//!           memmem::sse2_64::Constructor,
160//!           simd::SSE2_POPCNT,
161//!         >) -> Result<(), EngineError> {
162//!   #[target_feature(enable = "sse2")]
163//!   #[target_feature(enable = "popcnt")]
164//!   unsafe fn sse2_popcnt(simd: ResolvedSimd::<
165//!           quotes::nosimd::Constructor,
166//!           structural::nosimd::Constructor,
167//!           depth::sse2_64::Constructor,
168//!           memmem::sse2_64::Constructor,
169//!           simd::SSE2_POPCNT,
170//!         >) -> Result<(), EngineError> {
171//!     // Actual implementation using SIMD capabilities from `simd`.
172//!   }
173//!   
174//!   unsafe { sse2_popcnt(simd) }
175//! }
176//!
177//! fn run(simd: ResolvedSimd::<
178//!           quotes::nosimd::Constructor,
179//!           structural::nosimd::Constructor,
180//!           depth::nosimd::Constructor,
181//!           memmem::nosimd::Constructor,
182//!           simd::NOSIMD,
183//!         >) -> Result<(), EngineError> {
184//!   unsafe fn nosimd(simd: ResolvedSimd::<
185//!           quotes::nosimd::Constructor,
186//!           structural::nosimd::Constructor,
187//!           depth::nosimd::Constructor,
188//!           memmem::nosimd::Constructor,
189//!           simd::NOSIMD,
190//!         >) -> Result<(), EngineError> {
191//!     // Actual implementation using SIMD capabilities from `simd`.
192//!   }
193//!   
194//!   unsafe { nosimd(simd) }
195//! }
196//! ```
197//!
198//! The compiler is then free to optimize the inner functions fully, and the entire dispatch
199//! happens once when `entry` is called.
200//!
201//! The config dispatch is done at start of the engine in one of the functions that run the executor.
202//! The simd dispatch is put into the big entry points of the executor logic - `run_on_subtree`,
203//! `run_head_skipping`, and `run_tail_skipping`. These are generally big enough to not be inlined by the compiler,
204//! and long-running enough for that to not be an issue.
205use super::{
206    depth::{DepthImpl, DepthIterator, DepthIteratorResumeOutcome},
207    memmem::{Memmem, MemmemImpl},
208    quotes::{InnerIter, QuoteClassifiedIterator, QuotesImpl, ResumedQuoteClassifier},
209    structural::{BracketType, StructuralImpl, StructuralIterator},
210    ResumeClassifierState,
211};
212use crate::{
213    input::{Input, InputBlockIterator},
214    result::InputRecorder,
215    MaskType, BLOCK_SIZE,
216};
217use cfg_if::cfg_if;
218use std::{fmt::Display, marker::PhantomData};
219
220/// All SIMD capabilities of the engine and classifier types.
221pub(crate) trait Simd: Copy {
222    /// The implementation of [`QuoteClassifiedIterator`] of this SIMD configuration.
223    type QuotesClassifier<'i, I>: QuoteClassifiedIterator<'i, I, MaskType, BLOCK_SIZE> + InnerIter<I>
224    where
225        I: InputBlockIterator<'i, BLOCK_SIZE>;
226
227    /// The implementation of [`StructuralIterator`] of this SIMD configuration.
228    type StructuralClassifier<'i, I>: StructuralIterator<'i, I, Self::QuotesClassifier<'i, I>, MaskType, BLOCK_SIZE>
229    where
230        I: InputBlockIterator<'i, BLOCK_SIZE>;
231
232    /// The implementation of [`DepthIterator`] of this SIMD configuration.
233    type DepthClassifier<'i, I>: DepthIterator<'i, I, Self::QuotesClassifier<'i, I>, MaskType, BLOCK_SIZE>
234    where
235        I: InputBlockIterator<'i, BLOCK_SIZE>;
236
237    /// The implementation of [`Memmem`] of this SIMD configuration.
238    type MemmemClassifier<'i, 'b, 'r, I, R>: Memmem<'i, 'b, 'r, I, BLOCK_SIZE>
239    where
240        I: Input + 'i,
241        <I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>: 'b,
242        R: InputRecorder<<I as Input>::Block<'i, BLOCK_SIZE>> + 'r,
243        'i: 'r;
244
245    /// Get a unique descriptor of the enabled SIMD capabilities.
246    ///
247    /// The value should correspond to the `const`s defined in [`simd`](`self`),
248    /// like [`AVX2_PCLMULQDQ_POPCNT`] or [`NOSIMD`].
249    #[must_use]
250    #[allow(dead_code)] // Not used in targets that have only one possible tag (NOSIMD for non-x86 for example).
251    fn dispatch_tag(self) -> usize;
252
253    /// Walk through the JSON document given by the `iter` and classify quoted sequences.
254    #[must_use]
255    fn classify_quoted_sequences<'i, I>(self, iter: I) -> Self::QuotesClassifier<'i, I>
256    where
257        I: InputBlockIterator<'i, BLOCK_SIZE>;
258
259    /// Resume quote classification from an `iter` and, optionally, an already read
260    /// block that will be used as the first block to classify.
261    #[must_use]
262    fn resume_quote_classification<'i, I>(
263        self,
264        iter: I,
265        first_block: Option<I::Block>,
266    ) -> ResumedQuoteClassifier<Self::QuotesClassifier<'i, I>, I::Block, MaskType, BLOCK_SIZE>
267    where
268        I: InputBlockIterator<'i, BLOCK_SIZE>;
269
270    /// Walk through the JSON document quote-classified by `iter` and iterate over all
271    /// occurrences of structural characters in it.
272    #[must_use]
273    fn classify_structural_characters<'i, I>(
274        self,
275        iter: Self::QuotesClassifier<'i, I>,
276    ) -> Self::StructuralClassifier<'i, I>
277    where
278        I: InputBlockIterator<'i, BLOCK_SIZE>;
279
280    /// Resume classification using a state retrieved from a previously
281    /// used classifier via the [`stop`](StructuralIterator::stop) function.
282    #[must_use]
283    fn resume_structural_classification<'i, I>(
284        self,
285        state: ResumeClassifierState<'i, I, Self::QuotesClassifier<'i, I>, MaskType, BLOCK_SIZE>,
286    ) -> Self::StructuralClassifier<'i, I>
287    where
288        I: InputBlockIterator<'i, BLOCK_SIZE>;
289
290    /// Resume classification using a state retrieved from a previously
291    /// used classifier via the [`stop`](DepthIterator::stop) function.
292    #[must_use]
293    fn resume_depth_classification<'i, I>(
294        self,
295        state: ResumeClassifierState<'i, I, Self::QuotesClassifier<'i, I>, MaskType, BLOCK_SIZE>,
296        opening: BracketType,
297    ) -> DepthIteratorResumeOutcome<
298        'i,
299        I,
300        Self::QuotesClassifier<'i, I>,
301        Self::DepthClassifier<'i, I>,
302        MaskType,
303        BLOCK_SIZE,
304    >
305    where
306        I: InputBlockIterator<'i, BLOCK_SIZE>;
307
308    /// Create a classifier that can look for occurrences of a key in the `iter`.
309    #[must_use]
310    fn memmem<'i, 'b, 'r, I, R>(
311        self,
312        input: &'i I,
313        iter: &'b mut <I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>,
314    ) -> Self::MemmemClassifier<'i, 'b, 'r, I, R>
315    where
316        I: Input,
317        R: InputRecorder<<I as Input>::Block<'i, BLOCK_SIZE>>,
318        'i: 'r;
319}
320
321pub(crate) struct ResolvedSimd<Q, S, D, M, const TARGET: usize> {
322    phantom: PhantomData<(Q, S, D, M)>,
323}
324
325impl<Q, S, D, M, const TARGET: usize> Clone for ResolvedSimd<Q, S, D, M, TARGET> {
326    fn clone(&self) -> Self {
327        *self
328    }
329}
330
331impl<Q, S, D, M, const TARGET: usize> Copy for ResolvedSimd<Q, S, D, M, TARGET> {}
332
333impl<Q, S, D, M, const TARGET: usize> ResolvedSimd<Q, S, D, M, TARGET> {
334    pub(crate) fn new() -> Self {
335        Self { phantom: PhantomData }
336    }
337}
338
339impl<Q, S, D, M, const TARGET: usize> Simd for ResolvedSimd<Q, S, D, M, TARGET>
340where
341    Q: QuotesImpl,
342    S: StructuralImpl,
343    D: DepthImpl,
344    M: MemmemImpl,
345{
346    type QuotesClassifier<'i, I>
347        = Q::Classifier<'i, I>
348    where
349        I: InputBlockIterator<'i, BLOCK_SIZE>;
350
351    type StructuralClassifier<'i, I>
352        = S::Classifier<'i, I, Self::QuotesClassifier<'i, I>>
353    where
354        I: InputBlockIterator<'i, BLOCK_SIZE>;
355
356    type DepthClassifier<'i, I>
357        = D::Classifier<'i, I, Self::QuotesClassifier<'i, I>>
358    where
359        I: InputBlockIterator<'i, BLOCK_SIZE>;
360
361    type MemmemClassifier<'i, 'b, 'r, I, R>
362        = M::Classifier<'i, 'b, 'r, I, R>
363    where
364        I: Input + 'i,
365        <I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>: 'b,
366        R: InputRecorder<<I as Input>::Block<'i, BLOCK_SIZE>> + 'r,
367        'i: 'r;
368
369    #[inline(always)]
370    fn dispatch_tag(self) -> usize {
371        TARGET
372    }
373
374    #[inline(always)]
375    fn classify_quoted_sequences<'i, I>(self, iter: I) -> Self::QuotesClassifier<'i, I>
376    where
377        I: InputBlockIterator<'i, BLOCK_SIZE>,
378    {
379        Q::new(iter)
380    }
381
382    #[inline(always)]
383    fn resume_quote_classification<'i, I>(
384        self,
385        iter: I,
386        first_block: Option<I::Block>,
387    ) -> ResumedQuoteClassifier<Self::QuotesClassifier<'i, I>, I::Block, MaskType, BLOCK_SIZE>
388    where
389        I: InputBlockIterator<'i, BLOCK_SIZE>,
390    {
391        Q::resume(iter, first_block)
392    }
393
394    #[inline(always)]
395    fn classify_structural_characters<'i, I>(
396        self,
397        iter: Self::QuotesClassifier<'i, I>,
398    ) -> Self::StructuralClassifier<'i, I>
399    where
400        I: InputBlockIterator<'i, BLOCK_SIZE>,
401    {
402        S::new(iter)
403    }
404
405    #[inline(always)]
406    fn resume_structural_classification<'i, I>(
407        self,
408        state: ResumeClassifierState<'i, I, Self::QuotesClassifier<'i, I>, MaskType, BLOCK_SIZE>,
409    ) -> Self::StructuralClassifier<'i, I>
410    where
411        I: InputBlockIterator<'i, BLOCK_SIZE>,
412    {
413        S::resume(state)
414    }
415
416    #[inline(always)]
417    fn resume_depth_classification<'i, I>(
418        self,
419        state: ResumeClassifierState<'i, I, Self::QuotesClassifier<'i, I>, MaskType, BLOCK_SIZE>,
420        opening: BracketType,
421    ) -> DepthIteratorResumeOutcome<
422        'i,
423        I,
424        Self::QuotesClassifier<'i, I>,
425        Self::DepthClassifier<'i, I>,
426        MaskType,
427        BLOCK_SIZE,
428    >
429    where
430        I: InputBlockIterator<'i, BLOCK_SIZE>,
431    {
432        D::resume(state, opening)
433    }
434
435    #[inline(always)]
436    fn memmem<'i, 'b, 'r, I, R>(
437        self,
438        input: &'i I,
439        iter: &'b mut <I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>,
440    ) -> Self::MemmemClassifier<'i, 'b, 'r, I, R>
441    where
442        I: Input,
443        R: InputRecorder<<I as Input>::Block<'i, BLOCK_SIZE>>,
444        'i: 'r,
445    {
446        M::memmem(input, iter)
447    }
448}
449
450/// SIMD extension recognized by rsonpath.
451#[derive(Clone, Copy, PartialEq, Eq, Debug)]
452pub(crate) enum SimdTag {
453    /// No SIMD capabilities detected.
454    Nosimd,
455    /// SSE2 detected.
456    Sse2,
457    /// SSSE3 detected.
458    Ssse3,
459    /// AVX2 detected.
460    Avx2,
461}
462
463/// Runtime-detected SIMD configuration guiding how to construct a [`Simd`] implementation for the engine.
464#[derive(Debug, Clone, Copy)]
465pub(crate) struct SimdConfiguration {
466    highest_simd: SimdTag,
467    fast_quotes: bool,
468    fast_popcnt: bool,
469}
470
471/// Name of the env variable that can be used to force a given [`SimdConfiguration`] to be used.
472pub(crate) const SIMD_OVERRIDE_ENV_VARIABLE: &str = "RSONPATH_UNSAFE_FORCE_SIMD";
473
474impl SimdConfiguration {
475    pub(crate) fn highest_simd(&self) -> SimdTag {
476        self.highest_simd
477    }
478
479    pub(crate) fn fast_quotes(&self) -> bool {
480        self.fast_quotes
481    }
482
483    pub(crate) fn fast_popcnt(&self) -> bool {
484        self.fast_popcnt
485    }
486
487    fn try_parse(str: &str) -> Option<Self> {
488        let parts = str.split(';').collect::<Vec<_>>();
489
490        if parts.len() != 3 {
491            return None;
492        }
493
494        let simd_slug = parts[0];
495        let quotes_str = parts[1];
496        let popcnt_str = parts[2];
497
498        let simd = match simd_slug.to_ascii_lowercase().as_ref() {
499            "nosimd" => Some(SimdTag::Nosimd),
500            "sse2" => Some(SimdTag::Sse2),
501            "ssse3" => Some(SimdTag::Ssse3),
502            "avx2" => Some(SimdTag::Avx2),
503            _ => None,
504        };
505        let quotes = match quotes_str.to_ascii_lowercase().as_ref() {
506            "fast_quotes" => Some(true),
507            "slow_quotes" => Some(false),
508            _ => None,
509        };
510        let popcnt = match popcnt_str.to_ascii_lowercase().as_ref() {
511            "fast_popcnt" => Some(true),
512            "slow_popcnt" => Some(false),
513            _ => None,
514        };
515
516        Some(Self {
517            highest_simd: simd?,
518            fast_quotes: quotes?,
519            fast_popcnt: popcnt?,
520        })
521    }
522}
523
524/// Detect available SIMD features and return the best possible [`SimdConfiguration`]
525/// for the current system.
526///
527/// # Safety
528/// If the [`SIMD_OVERRIDE_ENV_VARIABLE`] env variable is defined, it MUST be a valid SIMD
529/// configuration for the current system. Otherwise, undefined behavior will follow.
530/// For example, setting the value to enable AVX2 on a platform without AVX2 is unsound.
531///
532/// # Panics
533/// If the [`SIMD_OVERRIDE_ENV_VARIABLE`] env variable is defined and does not contain a valid
534/// SIMD configuration, an immediate panic is raised.
535#[inline]
536#[must_use]
537pub(crate) fn configure() -> SimdConfiguration {
538    if let Ok(simd) = std::env::var(SIMD_OVERRIDE_ENV_VARIABLE) {
539        #[allow(clippy::expect_used)] // This is already an unsafe override, not expected to be used by users.
540        return SimdConfiguration::try_parse(&simd).expect("invalid simd configuration override");
541    }
542
543    cfg_if! {
544        if #[cfg(not(feature = "simd"))]
545        {
546            let highest_simd = SimdTag::Nosimd;
547            let fast_quotes = false;
548            let fast_popcnt = false;
549        }
550        else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
551        {
552            let highest_simd = if is_x86_feature_detected!("avx2") {
553                SimdTag::Avx2
554            } else if is_x86_feature_detected!("ssse3") {
555                SimdTag::Ssse3
556            } else if is_x86_feature_detected!("sse2") {
557                SimdTag::Sse2
558            } else {
559                SimdTag::Nosimd
560            };
561
562            let fast_quotes = is_x86_feature_detected!("pclmulqdq");
563            let fast_popcnt = is_x86_feature_detected!("popcnt");
564        }
565        else
566        {
567            let highest_simd = SimdTag::Nosimd;
568            let fast_quotes = false;
569            let fast_popcnt = false;
570        }
571    }
572
573    SimdConfiguration {
574        highest_simd,
575        fast_quotes,
576        fast_popcnt,
577    }
578}
579
580impl Display for SimdConfiguration {
581    #[inline]
582    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
583        let simd_slug = match self.highest_simd {
584            SimdTag::Nosimd => "nosimd",
585            SimdTag::Sse2 => "sse2",
586            SimdTag::Ssse3 => "ssse3",
587            SimdTag::Avx2 => "avx2",
588        };
589        let quote_desc = if self.fast_quotes { "fast_quotes" } else { "slow_quotes" };
590        let popcnt_desc = if self.fast_popcnt { "fast_popcnt" } else { "slow_popcnt" };
591
592        write!(f, "{simd_slug};{quote_desc};{popcnt_desc}")
593    }
594}
595
596pub(crate) const NOSIMD: usize = 0;
597
598cfg_if! {
599    if #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] {
600        pub(crate) const AVX2_PCLMULQDQ_POPCNT: usize = 1;
601        pub(crate) const SSSE3_PCLMULQDQ_POPCNT: usize = 2;
602        pub(crate) const SSSE3_PCLMULQDQ: usize = 3;
603        pub(crate) const SSSE3_POPCNT: usize = 4;
604        pub(crate) const SSSE3: usize = 5;
605        pub(crate) const SSE2_PCLMULQDQ_POPCNT: usize = 6;
606        pub(crate) const SSE2_PCLMULQDQ: usize = 7;
607        pub(crate) const SSE2_POPCNT: usize = 8;
608        pub(crate) const SSE2: usize = 9;
609
610        macro_rules! dispatch_simd {
611            ($simd:expr; $( $arg:expr ),* => fn $( $fn:tt )*) => {{
612                #[target_feature(enable = "avx2")]
613                #[target_feature(enable = "pclmulqdq")]
614                #[target_feature(enable = "popcnt")]
615                unsafe fn avx2_pclmulqdq_popcnt $($fn)*
616                #[target_feature(enable = "ssse3")]
617                #[target_feature(enable = "pclmulqdq")]
618                #[target_feature(enable = "popcnt")]
619                unsafe fn ssse3_pclmulqdq_popcnt $($fn)*
620                #[target_feature(enable = "ssse3")]
621                #[target_feature(enable = "pclmulqdq")]
622                unsafe fn ssse3_pclmulqdq $($fn)*
623                #[target_feature(enable = "ssse3")]
624                #[target_feature(enable = "popcnt")]
625                unsafe fn ssse3_popcnt $($fn)*
626                #[target_feature(enable = "ssse3")]
627                unsafe fn ssse3 $($fn)*
628                #[target_feature(enable = "sse2")]
629                #[target_feature(enable = "pclmulqdq")]
630                #[target_feature(enable = "popcnt")]
631                unsafe fn sse2_pclmulqdq_popcnt $($fn)*
632                #[target_feature(enable = "sse2")]
633                #[target_feature(enable = "pclmulqdq")]
634                unsafe fn sse2_pclmulqdq $($fn)*
635                #[target_feature(enable = "sse2")]
636                #[target_feature(enable = "popcnt")]
637                unsafe fn sse2_popcnt $($fn)*
638                #[target_feature(enable = "sse2")]
639                unsafe fn sse2 $($fn)*
640                fn nosimd $($fn)*
641
642                let simd = $simd;
643
644                // SAFETY: depends on the provided SimdConfig, which cannot be incorrectly constructed.
645                unsafe {
646                    match simd.dispatch_tag() {
647                        $crate::classification::simd::AVX2_PCLMULQDQ_POPCNT => avx2_pclmulqdq_popcnt($($arg),*),
648                        $crate::classification::simd::SSSE3_PCLMULQDQ_POPCNT => ssse3_pclmulqdq_popcnt($($arg),*),
649                        $crate::classification::simd::SSSE3_PCLMULQDQ => ssse3_pclmulqdq($($arg),*),
650                        $crate::classification::simd::SSSE3_POPCNT => ssse3_popcnt($($arg),*),
651                        $crate::classification::simd::SSSE3 => ssse3($($arg),*),
652                        $crate::classification::simd::SSE2_PCLMULQDQ_POPCNT => sse2_pclmulqdq_popcnt($($arg),*),
653                        $crate::classification::simd::SSE2_PCLMULQDQ => sse2_pclmulqdq($($arg),*),
654                        $crate::classification::simd::SSE2_POPCNT => sse2_popcnt($($arg),*),
655                        $crate::classification::simd::SSE2 => sse2($($arg),*),
656                        _ => nosimd($($arg),*),
657                    }
658                }
659            }};
660        }
661    }
662    else {
663        macro_rules! dispatch_simd {
664            ($simd:expr; $( $arg:expr ),* => fn $( $fn:tt )*) => {{
665                fn nosimd $($fn)*
666                nosimd($($arg),*)
667            }};
668        }
669    }
670}
671
672cfg_if! {
673    if #[cfg(target_arch = "x86_64")] {
674        macro_rules! config_simd {
675            ($conf:expr => |$simd:ident| $b:block) => {
676                {
677                    let conf = $conf;
678
679                    match conf.highest_simd() {
680                        // AVX2 implies all other optimizations.
681                        $crate::classification::simd::SimdTag::Avx2 => {
682                            assert!(conf.fast_quotes());
683                            assert!(conf.fast_popcnt());
684                            let $simd = $crate::classification::simd::ResolvedSimd::<
685                                $crate::classification::quotes::avx2_64::Constructor,
686                                $crate::classification::structural::avx2_64::Constructor,
687                                $crate::classification::depth::avx2_64::Constructor,
688                                $crate::classification::memmem::avx2_64::Constructor,
689                                {$crate::classification::simd::AVX2_PCLMULQDQ_POPCNT},
690                            >::new();
691                            $b
692                        }
693                        $crate::classification::simd::SimdTag::Ssse3 => {
694                            // In SSSE3 we need to check both advanced optimizations.
695                            match (conf.fast_quotes(), conf.fast_popcnt()) {
696                                (true, true) => {
697                                    let $simd = $crate::classification::simd::ResolvedSimd::<
698                                        $crate::classification::quotes::sse2_64::Constructor,
699                                        $crate::classification::structural::ssse3_64::Constructor,
700                                        $crate::classification::depth::sse2_64::Constructor,
701                                        $crate::classification::memmem::sse2_64::Constructor,
702                                        {$crate::classification::simd::SSSE3_PCLMULQDQ_POPCNT},
703                                    >::new();
704                                    $b
705                                }
706                                (true, false) => {
707                                    let $simd = $crate::classification::simd::ResolvedSimd::<
708                                        $crate::classification::quotes::sse2_64::Constructor,
709                                        $crate::classification::structural::ssse3_64::Constructor,
710                                        $crate::classification::depth::nosimd::Constructor,
711                                        $crate::classification::memmem::sse2_64::Constructor,
712                                        {$crate::classification::simd::SSSE3_PCLMULQDQ},
713                                    >::new();
714                                    $b
715                                }
716                                (false, true) => {
717                                    let $simd = $crate::classification::simd::ResolvedSimd::<
718                                        $crate::classification::quotes::nosimd::Constructor,
719                                        $crate::classification::structural::ssse3_64::Constructor,
720                                        $crate::classification::depth::sse2_64::Constructor,
721                                        $crate::classification::memmem::sse2_64::Constructor,
722                                        {$crate::classification::simd::SSSE3_POPCNT},
723                                    >::new();
724                                    $b
725                                }
726                                (false, false) => {
727                                    let $simd = $crate::classification::simd::ResolvedSimd::<
728                                        $crate::classification::quotes::nosimd::Constructor,
729                                        $crate::classification::structural::ssse3_64::Constructor,
730                                        $crate::classification::depth::nosimd::Constructor,
731                                        $crate::classification::memmem::sse2_64::Constructor,
732                                        {$crate::classification::simd::SSSE3},
733                                    >::new();
734                                    $b
735                                }
736                            }
737                        }
738                        $crate::classification::simd::SimdTag::Sse2 => {
739                            // In SSE2 we need to check both advanced optimizations,
740                            // and structural classifier is denied.
741                            match (conf.fast_quotes(), conf.fast_popcnt()) {
742                                (true, true) => {
743                                    let $simd = $crate::classification::simd::ResolvedSimd::<
744                                        $crate::classification::quotes::sse2_64::Constructor,
745                                        $crate::classification::structural::nosimd::Constructor,
746                                        $crate::classification::depth::sse2_64::Constructor,
747                                        $crate::classification::memmem::sse2_64::Constructor,
748                                        {$crate::classification::simd::SSE2_PCLMULQDQ_POPCNT},
749                                    >::new();
750                                    $b
751                                }
752                                (true, false) => {
753                                    let $simd = $crate::classification::simd::ResolvedSimd::<
754                                        $crate::classification::quotes::sse2_64::Constructor,
755                                        $crate::classification::structural::nosimd::Constructor,
756                                        $crate::classification::depth::nosimd::Constructor,
757                                        $crate::classification::memmem::sse2_64::Constructor,
758                                        {$crate::classification::simd::SSE2_PCLMULQDQ},
759                                    >::new();
760                                    $b
761                                }
762                                (false, true) => {
763                                    let $simd = $crate::classification::simd::ResolvedSimd::<
764                                        $crate::classification::quotes::nosimd::Constructor,
765                                        $crate::classification::structural::nosimd::Constructor,
766                                        $crate::classification::depth::sse2_64::Constructor,
767                                        $crate::classification::memmem::sse2_64::Constructor,
768                                        {$crate::classification::simd::SSE2_POPCNT},
769                                    >::new();
770                                    $b
771                                }
772                                (false, false) => {
773                                    let $simd = $crate::classification::simd::ResolvedSimd::<
774                                        $crate::classification::quotes::nosimd::Constructor,
775                                        $crate::classification::structural::nosimd::Constructor,
776                                        $crate::classification::depth::nosimd::Constructor,
777                                        $crate::classification::memmem::sse2_64::Constructor,
778                                        {$crate::classification::simd::SSE2},
779                                    >::new();
780                                    $b
781                                }
782                            }
783                        }
784                        // nosimd denies all optimizations.
785                        $crate::classification::simd::SimdTag::Nosimd => {
786                            let $simd = $crate::classification::simd::ResolvedSimd::<
787                                $crate::classification::quotes::nosimd::Constructor,
788                                $crate::classification::structural::nosimd::Constructor,
789                                $crate::classification::depth::nosimd::Constructor,
790                                $crate::classification::memmem::nosimd::Constructor,
791                                {$crate::classification::simd::NOSIMD}
792                            >::new();
793                            $b
794                        }
795                    }
796                }
797            };
798        }
799    }
800    else if #[cfg(target_arch = "x86")] {
801        macro_rules! config_simd {
802            ($conf:expr => |$simd:ident| $b:block) => {
803                {
804                    let conf = $conf;
805
806                    match conf.highest_simd() {
807                        // AVX2 implies all other optimizations.
808                        $crate::classification::simd::SimdTag::Avx2 => {
809                            assert!(conf.fast_quotes());
810                            assert!(conf.fast_popcnt());
811                            let $simd = $crate::classification::simd::ResolvedSimd::<
812                                $crate::classification::quotes::avx2_32::Constructor,
813                                $crate::classification::structural::avx2_32::Constructor,
814                                $crate::classification::depth::avx2_32::Constructor,
815                                $crate::classification::memmem::avx2_32::Constructor,
816                                {$crate::classification::simd::AVX2_PCLMULQDQ_POPCNT},
817                            >::new();
818                            $b
819                        }
820                        $crate::classification::simd::SimdTag::Ssse3 => {
821                            // In SSSE3 we need to check both advanced optimizations.
822                            match (conf.fast_quotes(), conf.fast_popcnt()) {
823                                (true, true) => {
824                                    let $simd = $crate::classification::simd::ResolvedSimd::<
825                                        $crate::classification::quotes::sse2_32::Constructor,
826                                        $crate::classification::structural::ssse3_32::Constructor,
827                                        $crate::classification::depth::sse2_32::Constructor,
828                                        $crate::classification::memmem::sse2_32::Constructor,
829                                        {$crate::classification::simd::SSSE3_PCLMULQDQ_POPCNT}
830                                    >::new();
831                                    $b
832                                }
833                                (true, false) => {
834                                    let $simd = $crate::classification::simd::ResolvedSimd::<
835                                        $crate::classification::quotes::sse2_32::Constructor,
836                                        $crate::classification::structural::ssse3_32::Constructor,
837                                        $crate::classification::depth::nosimd::Constructor,
838                                        $crate::classification::memmem::sse2_32::Constructor,
839                                        {$crate::classification::simd::SSSE3_PCLMULQDQ}
840                                    >::new();
841                                    $b
842                                }
843                                (false, true) => {
844                                    let $simd = $crate::classification::simd::ResolvedSimd::<
845                                        $crate::classification::quotes::nosimd::Constructor,
846                                        $crate::classification::structural::ssse3_32::Constructor,
847                                        $crate::classification::depth::sse2_32::Constructor,
848                                        $crate::classification::memmem::sse2_32::Constructor,
849                                        {$crate::classification::simd::SSSE3_POPCNT}
850                                    >::new();
851                                    $b
852                                }
853                                (false, false) => {
854                                    let $simd = $crate::classification::simd::ResolvedSimd::<
855                                        $crate::classification::quotes::nosimd::Constructor,
856                                        $crate::classification::structural::ssse3_32::Constructor,
857                                        $crate::classification::depth::nosimd::Constructor,
858                                        $crate::classification::memmem::sse2_32::Constructor,
859                                        {$crate::classification::simd::SSSE3}
860                                    >::new();
861                                    $b
862                                }
863                            }
864                        }
865                        $crate::classification::simd::SimdTag::Sse2 => {
866                            // In SSE2 we need to check both advanced optimizations,
867                            // and structural classifier is denied.
868                            match (conf.fast_quotes(), conf.fast_popcnt()) {
869                                (true, true) => {
870                                    let $simd = $crate::classification::simd::ResolvedSimd::<
871                                        $crate::classification::quotes::sse2_32::Constructor,
872                                        $crate::classification::structural::nosimd::Constructor,
873                                        $crate::classification::depth::sse2_32::Constructor,
874                                        $crate::classification::memmem::sse2_32::Constructor,
875                                        {$crate::classification::simd::SSE2_PCLMULQDQ_POPCNT}
876                                    >::new();
877                                    $b
878                                }
879                                (true, false) => {
880                                    let $simd = $crate::classification::simd::ResolvedSimd::<
881                                        $crate::classification::quotes::sse2_32::Constructor,
882                                        $crate::classification::structural::nosimd::Constructor,
883                                        $crate::classification::depth::nosimd::Constructor,
884                                        $crate::classification::memmem::sse2_32::Constructor,
885                                        {$crate::classification::simd::SSE2_PCLMULQDQ}
886                                    >::new();
887                                    $b
888                                }
889                                (false, true) => {
890                                    let $simd = $crate::classification::simd::ResolvedSimd::<
891                                        $crate::classification::quotes::nosimd::Constructor,
892                                        $crate::classification::structural::nosimd::Constructor,
893                                        $crate::classification::depth::sse2_32::Constructor,
894                                        $crate::classification::memmem::sse2_32::Constructor,
895                                        {$crate::classification::simd::SSE2_POPCNT}
896                                    >::new();
897                                    $b
898                                }
899                                (false, false) => {
900                                    let $simd = $crate::classification::simd::ResolvedSimd::<
901                                        $crate::classification::quotes::nosimd::Constructor,
902                                        $crate::classification::structural::nosimd::Constructor,
903                                        $crate::classification::depth::nosimd::Constructor,
904                                        $crate::classification::memmem::sse2_32::Constructor,
905                                        {$crate::classification::simd::SSE2}
906                                    >::new();
907                                    $b
908                                }
909                            }
910                        }
911                        // nosimd denies all optimizations.
912                        $crate::classification::simd::SimdTag::Nosimd => {
913                            let $simd = $crate::classification::simd::ResolvedSimd::<
914                                $crate::classification::quotes::nosimd::Constructor,
915                                $crate::classification::structural::nosimd::Constructor,
916                                $crate::classification::depth::nosimd::Constructor,
917                                $crate::classification::memmem::nosimd::Constructor,
918                                {$crate::classification::simd::NOSIMD}
919                            >::new();
920                            $b
921                        }
922                    }
923                }
924            };
925        }
926    }
927    else {
928        macro_rules! config_simd {
929            ($conf:expr => |$simd:ident| $b:block) => {
930                {
931                    let conf = $conf;
932                    assert_eq!(conf.highest_simd(), $crate::classification::simd::SimdTag::Nosimd);
933                    assert!(!conf.fast_quotes());
934                    assert!(!conf.fast_popcnt());
935                    let $simd = $crate::classification::simd::ResolvedSimd::<
936                        $crate::classification::quotes::nosimd::Constructor,
937                        $crate::classification::structural::nosimd::Constructor,
938                        $crate::classification::depth::nosimd::Constructor,
939                        $crate::classification::memmem::nosimd::Constructor,
940                        {$crate::classification::simd::NOSIMD},
941                    >::new();
942                    $b
943                }
944            };
945        }
946    }
947}
948
949pub(crate) use config_simd;
950pub(crate) use dispatch_simd;