Skip to main content

archmage_macros/
lib.rs

1//! Proc-macros for archmage SIMD capability tokens.
2//!
3//! Provides `#[arcane]` attribute (with `#[arcane]` alias) to make raw intrinsics
4//! safe via token proof.
5
6use proc_macro::TokenStream;
7use quote::{ToTokens, format_ident, quote, quote_spanned};
8use syn::{
9    Attribute, FnArg, GenericParam, Ident, PatType, Signature, Token, Type, TypeParamBound,
10    parse::{Parse, ParseStream},
11    parse_macro_input, parse_quote, token,
12};
13
14/// A function parsed with the body left as an opaque TokenStream.
15///
16/// Only the signature is fully parsed into an AST — the body tokens are collected
17/// without building any AST nodes (no expressions, statements, or patterns parsed).
18/// This saves ~2ms per function invocation at 100 lines of code.
19#[derive(Clone)]
20struct LightFn {
21    attrs: Vec<Attribute>,
22    vis: syn::Visibility,
23    sig: Signature,
24    brace_token: token::Brace,
25    body: proc_macro2::TokenStream,
26}
27
28impl Parse for LightFn {
29    fn parse(input: ParseStream) -> syn::Result<Self> {
30        let attrs = input.call(Attribute::parse_outer)?;
31        let vis: syn::Visibility = input.parse()?;
32        let sig: Signature = input.parse()?;
33        let content;
34        let brace_token = syn::braced!(content in input);
35        let body: proc_macro2::TokenStream = content.parse()?;
36        Ok(LightFn {
37            attrs,
38            vis,
39            sig,
40            brace_token,
41            body,
42        })
43    }
44}
45
46impl ToTokens for LightFn {
47    fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
48        for attr in &self.attrs {
49            attr.to_tokens(tokens);
50        }
51        self.vis.to_tokens(tokens);
52        self.sig.to_tokens(tokens);
53        self.brace_token.surround(tokens, |tokens| {
54            self.body.to_tokens(tokens);
55        });
56    }
57}
58
59/// Filter out `#[inline]`, `#[inline(always)]`, `#[inline(never)]` from attributes.
60///
61/// Used to prevent duplicate inline attributes when the macro adds its own.
62/// Duplicate `#[inline]` is a warning that will become a hard error.
63fn filter_inline_attrs(attrs: &[Attribute]) -> Vec<&Attribute> {
64    attrs
65        .iter()
66        .filter(|attr| !attr.path().is_ident("inline"))
67        .collect()
68}
69
70/// Check if an attribute is a lint-control attribute.
71///
72/// Lint-control attributes (`#[allow(...)]`, `#[expect(...)]`, `#[deny(...)]`,
73/// `#[warn(...)]`, `#[forbid(...)]`) must be propagated to generated sibling
74/// functions so that user-applied lint suppressions work on the generated code.
75fn is_lint_attr(attr: &Attribute) -> bool {
76    let path = attr.path();
77    path.is_ident("allow")
78        || path.is_ident("expect")
79        || path.is_ident("deny")
80        || path.is_ident("warn")
81        || path.is_ident("forbid")
82}
83
84/// Extract lint-control attributes from a list of attributes.
85///
86/// Returns references to `#[allow(...)]`, `#[expect(...)]`, `#[deny(...)]`,
87/// `#[warn(...)]`, and `#[forbid(...)]` attributes. These need to be propagated
88/// to generated sibling functions so clippy/rustc lint suppressions work.
89fn filter_lint_attrs(attrs: &[Attribute]) -> Vec<&Attribute> {
90    attrs.iter().filter(|attr| is_lint_attr(attr)).collect()
91}
92
93/// Build a turbofish token stream from a function's generics.
94///
95/// Collects type and const generic parameters (skipping lifetimes) and returns
96/// a `::<A, B, N, M>` turbofish fragment. Returns empty tokens if there are no
97/// type/const generics to forward.
98///
99/// This is needed when the dispatcher or wrapper calls variant/sibling functions
100/// that have const generics not inferable from argument types alone.
101fn build_turbofish(generics: &syn::Generics) -> proc_macro2::TokenStream {
102    let params: Vec<proc_macro2::TokenStream> = generics
103        .params
104        .iter()
105        .filter_map(|param| match param {
106            GenericParam::Type(tp) => {
107                let ident = &tp.ident;
108                Some(quote! { #ident })
109            }
110            GenericParam::Const(cp) => {
111                let ident = &cp.ident;
112                Some(quote! { #ident })
113            }
114            GenericParam::Lifetime(_) => None,
115        })
116        .collect();
117    if params.is_empty() {
118        quote! {}
119    } else {
120        quote! { ::<#(#params),*> }
121    }
122}
123
124/// Replace all `Self` identifier tokens with a concrete type in a token stream.
125///
126/// Recurses into groups (braces, parens, brackets). Used for `#[arcane(_self = Type)]`
127/// to replace `Self` in both the return type and body without needing to parse the body.
128fn replace_self_in_tokens(
129    tokens: proc_macro2::TokenStream,
130    replacement: &Type,
131) -> proc_macro2::TokenStream {
132    let mut result = proc_macro2::TokenStream::new();
133    for tt in tokens {
134        match tt {
135            proc_macro2::TokenTree::Ident(ref ident) if ident == "Self" => {
136                result.extend(replacement.to_token_stream());
137            }
138            proc_macro2::TokenTree::Group(group) => {
139                let new_stream = replace_self_in_tokens(group.stream(), replacement);
140                let mut new_group = proc_macro2::Group::new(group.delimiter(), new_stream);
141                new_group.set_span(group.span());
142                result.extend(std::iter::once(proc_macro2::TokenTree::Group(new_group)));
143            }
144            other => {
145                result.extend(std::iter::once(other));
146            }
147        }
148    }
149    result
150}
151
152/// Arguments to the `#[arcane]` macro.
153#[derive(Default)]
154struct ArcaneArgs {
155    /// Use `#[inline(always)]` instead of `#[inline]` for the inner function.
156    /// Requires nightly Rust with `#![feature(target_feature_inline_always)]`.
157    inline_always: bool,
158    /// The concrete type to use for `self` receiver.
159    /// When specified, `self`/`&self`/`&mut self` is transformed to `_self: Type`/`&Type`/`&mut Type`.
160    /// Implies `nested = true`.
161    self_type: Option<Type>,
162    /// Generate an `unreachable!()` stub on the wrong architecture.
163    /// Default is false (cfg-out: no function emitted on wrong arch).
164    stub: bool,
165    /// Use nested inner function instead of sibling function.
166    /// Implied by `_self = Type`. Required for associated functions in impl blocks
167    /// that have no `self` receiver (the macro can't distinguish them from free functions).
168    nested: bool,
169    /// Inject `use archmage::intrinsics::{arch}::*;` (includes safe memory ops).
170    import_intrinsics: bool,
171    /// Inject `use magetypes::simd::{ns}::*;`, `use magetypes::simd::generic::*;`,
172    /// and `use magetypes::simd::backends::*;`.
173    import_magetypes: bool,
174}
175
176impl Parse for ArcaneArgs {
177    fn parse(input: ParseStream) -> syn::Result<Self> {
178        let mut args = ArcaneArgs::default();
179
180        while !input.is_empty() {
181            let ident: Ident = input.parse()?;
182            match ident.to_string().as_str() {
183                "inline_always" => args.inline_always = true,
184                "stub" => args.stub = true,
185                "nested" => args.nested = true,
186                "import_intrinsics" => args.import_intrinsics = true,
187                "import_magetypes" => args.import_magetypes = true,
188                "_self" => {
189                    let _: Token![=] = input.parse()?;
190                    args.self_type = Some(input.parse()?);
191                }
192                other => {
193                    return Err(syn::Error::new(
194                        ident.span(),
195                        format!("unknown arcane argument: `{}`", other),
196                    ));
197                }
198            }
199            // Consume optional comma
200            if input.peek(Token![,]) {
201                let _: Token![,] = input.parse()?;
202            }
203        }
204
205        // _self = Type implies nested (inner fn needed for Self replacement)
206        if args.self_type.is_some() {
207            args.nested = true;
208        }
209
210        Ok(args)
211    }
212}
213
214// Token-to-features and trait-to-features mappings are generated from
215// token-registry.toml by xtask. Regenerate with: cargo run -p xtask -- generate
216mod generated;
217use generated::{
218    canonical_token_to_tier_suffix, tier_to_canonical_token, token_to_arch, token_to_features,
219    token_to_magetypes_namespace, trait_to_arch, trait_to_features, trait_to_magetypes_namespace,
220};
221
222/// Result of extracting token info from a type.
223enum TokenTypeInfo {
224    /// Concrete token type (e.g., `Avx2Token`)
225    Concrete(String),
226    /// impl Trait with the trait names (e.g., `impl HasX64V2`)
227    ImplTrait(Vec<String>),
228    /// Generic type parameter name (e.g., `T`)
229    Generic(String),
230}
231
232/// Extract token type information from a type.
233fn extract_token_type_info(ty: &Type) -> Option<TokenTypeInfo> {
234    match ty {
235        Type::Path(type_path) => {
236            // Get the last segment of the path (e.g., "Avx2Token" from "archmage::Avx2Token")
237            type_path.path.segments.last().map(|seg| {
238                let name = seg.ident.to_string();
239                // Check if it's a known concrete token type
240                if token_to_features(&name).is_some() {
241                    TokenTypeInfo::Concrete(name)
242                } else {
243                    // Might be a generic type parameter like `T`
244                    TokenTypeInfo::Generic(name)
245                }
246            })
247        }
248        Type::Reference(type_ref) => {
249            // Handle &Token or &mut Token
250            extract_token_type_info(&type_ref.elem)
251        }
252        Type::ImplTrait(impl_trait) => {
253            // Handle `impl HasX64V2` or `impl HasX64V2 + HasNeon`
254            let traits: Vec<String> = extract_trait_names_from_bounds(&impl_trait.bounds);
255            if traits.is_empty() {
256                None
257            } else {
258                Some(TokenTypeInfo::ImplTrait(traits))
259            }
260        }
261        _ => None,
262    }
263}
264
265/// Extract trait names from type param bounds.
266fn extract_trait_names_from_bounds(
267    bounds: &syn::punctuated::Punctuated<TypeParamBound, Token![+]>,
268) -> Vec<String> {
269    bounds
270        .iter()
271        .filter_map(|bound| {
272            if let TypeParamBound::Trait(trait_bound) = bound {
273                trait_bound
274                    .path
275                    .segments
276                    .last()
277                    .map(|seg| seg.ident.to_string())
278            } else {
279                None
280            }
281        })
282        .collect()
283}
284
285/// Look up a generic type parameter in the function's generics.
286fn find_generic_bounds(sig: &Signature, type_name: &str) -> Option<Vec<String>> {
287    // Check inline bounds first (e.g., `fn foo<T: HasX64V2>(token: T)`)
288    for param in &sig.generics.params {
289        if let GenericParam::Type(type_param) = param
290            && type_param.ident == type_name
291        {
292            let traits = extract_trait_names_from_bounds(&type_param.bounds);
293            if !traits.is_empty() {
294                return Some(traits);
295            }
296        }
297    }
298
299    // Check where clause (e.g., `fn foo<T>(token: T) where T: HasX64V2`)
300    if let Some(where_clause) = &sig.generics.where_clause {
301        for predicate in &where_clause.predicates {
302            if let syn::WherePredicate::Type(pred_type) = predicate
303                && let Type::Path(type_path) = &pred_type.bounded_ty
304                && let Some(seg) = type_path.path.segments.last()
305                && seg.ident == type_name
306            {
307                let traits = extract_trait_names_from_bounds(&pred_type.bounds);
308                if !traits.is_empty() {
309                    return Some(traits);
310                }
311            }
312        }
313    }
314
315    None
316}
317
318/// Convert trait names to features, collecting all features from all traits.
319fn traits_to_features(trait_names: &[String]) -> Option<Vec<&'static str>> {
320    let mut all_features = Vec::new();
321
322    for trait_name in trait_names {
323        if let Some(features) = trait_to_features(trait_name) {
324            for &feature in features {
325                if !all_features.contains(&feature) {
326                    all_features.push(feature);
327                }
328            }
329        }
330    }
331
332    if all_features.is_empty() {
333        None
334    } else {
335        Some(all_features)
336    }
337}
338
339/// Trait names that don't map to any CPU features. These are valid in the type
340/// system but cannot be used as token bounds in `#[arcane]`/`#[rite]` because
341/// the macros need concrete features to generate `#[target_feature]` attributes.
342const FEATURELESS_TRAIT_NAMES: &[&str] = &["SimdToken", "IntoConcreteToken"];
343
344/// Check if any trait names are featureless (no CPU feature mapping).
345/// Returns the first featureless trait name found.
346fn find_featureless_trait(trait_names: &[String]) -> Option<&'static str> {
347    for name in trait_names {
348        for &featureless in FEATURELESS_TRAIT_NAMES {
349            if name == featureless {
350                return Some(featureless);
351            }
352        }
353    }
354    None
355}
356
357/// Diagnose why `find_token_param` failed. Returns the name of a featureless
358/// trait if the signature has a parameter bounded by one (e.g., `SimdToken`).
359fn diagnose_featureless_token(sig: &Signature) -> Option<&'static str> {
360    for arg in &sig.inputs {
361        if let FnArg::Typed(PatType { ty, .. }) = arg
362            && let Some(info) = extract_token_type_info(ty)
363        {
364            match &info {
365                TokenTypeInfo::ImplTrait(names) => {
366                    if let Some(name) = find_featureless_trait(names) {
367                        return Some(name);
368                    }
369                }
370                TokenTypeInfo::Generic(type_name) => {
371                    // Check if the type name itself is a featureless trait
372                    // (e.g., `token: SimdToken` used as a bare path)
373                    let as_vec = vec![type_name.clone()];
374                    if let Some(name) = find_featureless_trait(&as_vec) {
375                        return Some(name);
376                    }
377                    // Check generic bounds (e.g., `T: SimdToken`)
378                    if let Some(bounds) = find_generic_bounds(sig, type_name)
379                        && let Some(name) = find_featureless_trait(&bounds)
380                    {
381                        return Some(name);
382                    }
383                }
384                TokenTypeInfo::Concrete(_) => {}
385            }
386        }
387    }
388    None
389}
390
391/// Result of finding a token parameter in a function signature.
392struct TokenParamInfo {
393    /// The parameter identifier (e.g., `token`)
394    ident: Ident,
395    /// Target features to enable (e.g., `["avx2", "fma"]`)
396    features: Vec<&'static str>,
397    /// Target architecture (Some for concrete tokens, None for traits/generics)
398    target_arch: Option<&'static str>,
399    /// Concrete token type name (Some for concrete tokens, None for traits/generics)
400    token_type_name: Option<String>,
401    /// Magetypes width namespace (e.g., "v3", "neon", "wasm128")
402    magetypes_namespace: Option<&'static str>,
403}
404
405/// Resolve magetypes namespace from a list of trait names.
406/// Returns the first matching namespace found.
407fn traits_to_magetypes_namespace(trait_names: &[String]) -> Option<&'static str> {
408    for name in trait_names {
409        if let Some(ns) = trait_to_magetypes_namespace(name) {
410            return Some(ns);
411        }
412    }
413    None
414}
415
416/// Given trait bound names, return the first matching target architecture.
417fn traits_to_arch(trait_names: &[String]) -> Option<&'static str> {
418    for name in trait_names {
419        if let Some(arch) = trait_to_arch(name) {
420            return Some(arch);
421        }
422    }
423    None
424}
425
426/// Find the first token parameter in a function signature.
427fn find_token_param(sig: &Signature) -> Option<TokenParamInfo> {
428    for arg in &sig.inputs {
429        match arg {
430            FnArg::Receiver(_) => {
431                // Self receivers (self, &self, &mut self) are not yet supported.
432                // The macro creates an inner function, and Rust's inner functions
433                // cannot have `self` parameters. Supporting this would require
434                // AST rewriting to replace `self` with a regular parameter.
435                // See the module docs for the workaround.
436                continue;
437            }
438            FnArg::Typed(PatType { pat, ty, .. }) => {
439                if let Some(info) = extract_token_type_info(ty) {
440                    let (features, arch, token_name, mage_ns) = match info {
441                        TokenTypeInfo::Concrete(ref name) => {
442                            let features = token_to_features(name).map(|f| f.to_vec());
443                            let arch = token_to_arch(name);
444                            let ns = token_to_magetypes_namespace(name);
445                            (features, arch, Some(name.clone()), ns)
446                        }
447                        TokenTypeInfo::ImplTrait(ref trait_names) => {
448                            let ns = traits_to_magetypes_namespace(trait_names);
449                            let arch = traits_to_arch(trait_names);
450                            (traits_to_features(trait_names), arch, None, ns)
451                        }
452                        TokenTypeInfo::Generic(type_name) => {
453                            // Look up the generic parameter's bounds
454                            let bounds = find_generic_bounds(sig, &type_name);
455                            let features = bounds.as_ref().and_then(|t| traits_to_features(t));
456                            let ns = bounds
457                                .as_ref()
458                                .and_then(|t| traits_to_magetypes_namespace(t));
459                            let arch = bounds.as_ref().and_then(|t| traits_to_arch(t));
460                            (features, arch, None, ns)
461                        }
462                    };
463
464                    if let Some(features) = features {
465                        // Extract parameter name (or synthesize one for wildcard `_`)
466                        let ident = match pat.as_ref() {
467                            syn::Pat::Ident(pat_ident) => Some(pat_ident.ident.clone()),
468                            syn::Pat::Wild(w) => {
469                                Some(Ident::new("__archmage_token", w.underscore_token.span))
470                            }
471                            _ => None,
472                        };
473                        if let Some(ident) = ident {
474                            return Some(TokenParamInfo {
475                                ident,
476                                features,
477                                target_arch: arch,
478                                token_type_name: token_name,
479                                magetypes_namespace: mage_ns,
480                            });
481                        }
482                    }
483                }
484            }
485        }
486    }
487    None
488}
489
490/// Represents the kind of self receiver and the transformed parameter.
491enum SelfReceiver {
492    /// `self` (by value/move)
493    Owned,
494    /// `&self` (shared reference)
495    Ref,
496    /// `&mut self` (mutable reference)
497    RefMut,
498}
499
500/// Generate import statements to prepend to a function body.
501///
502/// Returns a `TokenStream` of `use` statements based on the import flags,
503/// target architecture, and magetypes namespace.
504fn generate_imports(
505    target_arch: Option<&str>,
506    magetypes_namespace: Option<&str>,
507    import_intrinsics: bool,
508    import_magetypes: bool,
509) -> proc_macro2::TokenStream {
510    let mut imports = proc_macro2::TokenStream::new();
511
512    if import_intrinsics && let Some(arch) = target_arch {
513        let arch_ident = format_ident!("{}", arch);
514        imports.extend(quote! {
515            #[allow(unused_imports)]
516            use archmage::intrinsics::#arch_ident::*;
517        });
518        // ScalarToken or unknown arch: import_intrinsics is a no-op
519    }
520
521    if import_magetypes && let Some(ns) = magetypes_namespace {
522        let ns_ident = format_ident!("{}", ns);
523        imports.extend(quote! {
524            #[allow(unused_imports)]
525            use magetypes::simd::#ns_ident::*;
526            #[allow(unused_imports)]
527            use magetypes::simd::backends::*;
528        });
529    }
530
531    imports
532}
533
534/// Shared implementation for arcane/arcane macros.
535fn arcane_impl(mut input_fn: LightFn, macro_name: &str, args: ArcaneArgs) -> TokenStream {
536    // Check for self receiver
537    let has_self_receiver = input_fn
538        .sig
539        .inputs
540        .first()
541        .map(|arg| matches!(arg, FnArg::Receiver(_)))
542        .unwrap_or(false);
543
544    // Nested mode is required when _self = Type is used (for Self replacement in nested fn).
545    // In sibling mode, self/Self work naturally since both fns live in the same impl scope.
546    // However, if there's a self receiver in nested mode, we still need _self = Type.
547    if has_self_receiver && args.nested && args.self_type.is_none() {
548        let msg = format!(
549            "{} with self receiver in nested mode requires `_self = Type` argument.\n\
550             Example: #[{}(nested, _self = MyType)]\n\
551             Use `_self` (not `self`) in the function body to refer to self.\n\
552             \n\
553             Alternatively, remove `nested` to use sibling expansion (default), \
554             which handles self/Self naturally.",
555            macro_name, macro_name
556        );
557        return syn::Error::new_spanned(&input_fn.sig, msg)
558            .to_compile_error()
559            .into();
560    }
561
562    // Find the token parameter, its features, target arch, and token type name
563    let TokenParamInfo {
564        ident: _token_ident,
565        features,
566        target_arch,
567        token_type_name,
568        magetypes_namespace,
569    } = match find_token_param(&input_fn.sig) {
570        Some(result) => result,
571        None => {
572            // Check for specific misuse: featureless traits like SimdToken
573            if let Some(trait_name) = diagnose_featureless_token(&input_fn.sig) {
574                let msg = format!(
575                    "`{trait_name}` cannot be used as a token bound in #[{macro_name}] \
576                     because it doesn't specify any CPU features.\n\
577                     \n\
578                     #[{macro_name}] needs concrete features to generate #[target_feature]. \
579                     Use a concrete token or a feature trait:\n\
580                     \n\
581                     Concrete tokens: X64V3Token, Desktop64, NeonToken, Arm64V2Token, ...\n\
582                     Feature traits:  impl HasX64V2, impl HasNeon, impl HasArm64V3, ..."
583                );
584                return syn::Error::new_spanned(&input_fn.sig, msg)
585                    .to_compile_error()
586                    .into();
587            }
588            let msg = format!(
589                "{} requires a token parameter. Supported forms:\n\
590                 - Concrete: `token: X64V3Token`\n\
591                 - impl Trait: `token: impl HasX64V2`\n\
592                 - Generic: `fn foo<T: HasX64V2>(token: T, ...)`\n\
593                 - With self: `#[{}(_self = Type)] fn method(&self, token: impl HasNeon, ...)`",
594                macro_name, macro_name
595            );
596            return syn::Error::new_spanned(&input_fn.sig, msg)
597                .to_compile_error()
598                .into();
599        }
600    };
601
602    // Check: import_intrinsics with AVX-512 features requires the avx512 cargo feature
603    // on archmage (propagated to archmage-macros). Without it, 512-bit safe memory ops
604    // from safe_unaligned_simd are not available, and _mm512_loadu_ps etc. would resolve
605    // to the unsafe core::arch versions (taking raw pointers instead of references).
606    //
607    // We check the resolved features (not the token name) so this works uniformly for
608    // concrete tokens (X64V4Token), trait bounds (impl HasX64V4), and generics (T: HasX64V4).
609    #[cfg(not(feature = "avx512"))]
610    if args.import_intrinsics && features.iter().any(|f| f.starts_with("avx512")) {
611        let token_desc = token_type_name.as_deref().unwrap_or("an AVX-512 token");
612        let msg = format!(
613            "Using {token_desc} with `import_intrinsics` requires the `avx512` feature.\n\
614             \n\
615             Add to your Cargo.toml:\n\
616             \x20 archmage = {{ version = \"...\", features = [\"avx512\"] }}\n\
617             \n\
618             Without it, 512-bit safe memory ops (_mm512_loadu_ps etc.) are not available.\n\
619             If you only need value intrinsics (no memory ops), remove `import_intrinsics`."
620        );
621        return syn::Error::new_spanned(&input_fn.sig, msg)
622            .to_compile_error()
623            .into();
624    }
625
626    // Prepend import statements to body if requested
627    let body_imports = generate_imports(
628        target_arch,
629        magetypes_namespace,
630        args.import_intrinsics,
631        args.import_magetypes,
632    );
633    if !body_imports.is_empty() {
634        let original_body = &input_fn.body;
635        input_fn.body = quote! {
636            #body_imports
637            #original_body
638        };
639    }
640
641    // Build target_feature attributes
642    let target_feature_attrs: Vec<Attribute> = features
643        .iter()
644        .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
645        .collect();
646
647    // Rename wildcard patterns (`_: Type`) to named params so the inner/sibling call works
648    let mut wild_rename_counter = 0u32;
649    for arg in &mut input_fn.sig.inputs {
650        if let FnArg::Typed(pat_type) = arg
651            && matches!(pat_type.pat.as_ref(), syn::Pat::Wild(_))
652        {
653            let ident = format_ident!("__archmage_wild_{}", wild_rename_counter);
654            wild_rename_counter += 1;
655            *pat_type.pat = syn::Pat::Ident(syn::PatIdent {
656                attrs: vec![],
657                by_ref: None,
658                mutability: None,
659                ident,
660                subpat: None,
661            });
662        }
663    }
664
665    // Choose inline attribute based on args
666    let inline_attr: Attribute = if args.inline_always {
667        parse_quote!(#[inline(always)])
668    } else {
669        parse_quote!(#[inline])
670    };
671
672    // On wasm32, #[target_feature(enable = "simd128")] functions are safe (Rust 1.54+).
673    // The wasm validation model guarantees unsupported instructions trap deterministically,
674    // so there's no UB from feature mismatch. Skip the unsafe wrapper entirely.
675    if target_arch == Some("wasm32") {
676        return arcane_impl_wasm_safe(
677            input_fn,
678            &args,
679            token_type_name,
680            target_feature_attrs,
681            inline_attr,
682        );
683    }
684
685    if args.nested {
686        arcane_impl_nested(
687            input_fn,
688            &args,
689            target_arch,
690            token_type_name,
691            target_feature_attrs,
692            inline_attr,
693        )
694    } else {
695        arcane_impl_sibling(
696            input_fn,
697            &args,
698            target_arch,
699            token_type_name,
700            target_feature_attrs,
701            inline_attr,
702        )
703    }
704}
705
706/// WASM-safe expansion: emits rite-style output (no unsafe wrapper).
707///
708/// On wasm32, `#[target_feature(enable = "simd128")]` is safe — the wasm validation
709/// model traps deterministically on unsupported instructions, so there's no UB.
710/// We emit the function directly with `#[target_feature]` + `#[inline]`, like `#[rite]`.
711///
712/// If `_self = Type` is set, we inject `let _self = self;` at the top of the body
713/// (the function stays in impl scope, so `Self` resolves naturally — no replacement needed).
714fn arcane_impl_wasm_safe(
715    input_fn: LightFn,
716    args: &ArcaneArgs,
717    token_type_name: Option<String>,
718    target_feature_attrs: Vec<Attribute>,
719    inline_attr: Attribute,
720) -> TokenStream {
721    let vis = &input_fn.vis;
722    let sig = &input_fn.sig;
723    let fn_name = &sig.ident;
724    let attrs = &input_fn.attrs;
725
726    let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
727
728    // If _self = Type is set, inject `let _self = self;` at top of body so user code
729    // referencing `_self` works. The function remains in impl scope, so `Self` resolves
730    // naturally — no Self replacement needed (unlike nested mode's inner fn).
731    let body = if args.self_type.is_some() {
732        let original_body = &input_fn.body;
733        quote! {
734            let _self = self;
735            #original_body
736        }
737    } else {
738        input_fn.body.clone()
739    };
740
741    // Prepend target_feature + inline attrs, filtering user #[inline] to avoid duplicates
742    let mut new_attrs = target_feature_attrs;
743    new_attrs.push(inline_attr);
744    for attr in filter_inline_attrs(attrs) {
745        new_attrs.push(attr.clone());
746    }
747
748    let stub = if args.stub {
749        // Build stub args for suppressing unused-variable warnings
750        let stub_args: Vec<proc_macro2::TokenStream> = sig
751            .inputs
752            .iter()
753            .filter_map(|arg| match arg {
754                FnArg::Typed(pat_type) => {
755                    if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
756                        let ident = &pat_ident.ident;
757                        Some(quote!(#ident))
758                    } else {
759                        None
760                    }
761                }
762                FnArg::Receiver(_) => None,
763            })
764            .collect();
765
766        quote! {
767            #[cfg(not(target_arch = "wasm32"))]
768            #vis #sig {
769                let _ = (#(#stub_args),*);
770                unreachable!(
771                    "BUG: {}() was called but requires {} (target_arch = \"wasm32\"). \
772                     {}::summon() returns None on this architecture, so this function \
773                     is unreachable in safe code. If you used forge_token_dangerously(), \
774                     that is the bug.",
775                    stringify!(#fn_name),
776                    #token_type_str,
777                    #token_type_str,
778                )
779            }
780        }
781    } else {
782        quote! {}
783    };
784
785    let expanded = quote! {
786        #[cfg(target_arch = "wasm32")]
787        #(#new_attrs)*
788        #vis #sig {
789            #body
790        }
791
792        #stub
793    };
794
795    expanded.into()
796}
797
798/// Sibling expansion (default): generates two functions at the same scope level.
799///
800/// ```ignore
801/// // #[arcane] fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { body }
802/// // expands to:
803/// #[cfg(target_arch = "x86_64")]
804/// #[doc(hidden)]
805/// #[target_feature(enable = "avx2,fma,...")]
806/// #[inline]
807/// fn __arcane_process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { body }
808///
809/// #[cfg(target_arch = "x86_64")]
810/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] {
811///     unsafe { __arcane_process(token, data) }
812/// }
813/// ```
814///
815/// The sibling function is safe (Rust 2024 edition allows safe `#[target_feature]`
816/// functions). Only the call from the wrapper needs `unsafe` because the wrapper
817/// lacks matching target features. Compatible with `#![forbid(unsafe_code)]`.
818///
819/// Self/self work naturally since both functions live in the same impl scope.
820fn arcane_impl_sibling(
821    input_fn: LightFn,
822    args: &ArcaneArgs,
823    target_arch: Option<&str>,
824    token_type_name: Option<String>,
825    target_feature_attrs: Vec<Attribute>,
826    inline_attr: Attribute,
827) -> TokenStream {
828    let vis = &input_fn.vis;
829    let sig = &input_fn.sig;
830    let fn_name = &sig.ident;
831    let generics = &sig.generics;
832    let where_clause = &generics.where_clause;
833    let inputs = &sig.inputs;
834    let output = &sig.output;
835    let body = &input_fn.body;
836    // Filter out user #[inline] attrs to avoid duplicates (will become a hard error).
837    // The wrapper gets #[inline(always)] unconditionally — it's a trivial unsafe { sibling() }.
838    let attrs = filter_inline_attrs(&input_fn.attrs);
839    // Lint-control attrs (#[allow(...)], #[expect(...)], etc.) must also go on the sibling,
840    // because the sibling has the same parameters and clippy lints it independently.
841    let lint_attrs = filter_lint_attrs(&input_fn.attrs);
842
843    let sibling_name = format_ident!("__arcane_{}", fn_name);
844
845    // Detect self receiver
846    let has_self_receiver = inputs
847        .first()
848        .map(|arg| matches!(arg, FnArg::Receiver(_)))
849        .unwrap_or(false);
850
851    // Build sibling signature: same as original but with sibling name, #[doc(hidden)]
852    // NOT unsafe — Rust 2024 edition allows safe #[target_feature] functions.
853    // Only the call from non-matching context (the wrapper) needs unsafe.
854    let sibling_sig_inputs = inputs;
855
856    // Build turbofish for forwarding type/const generic params to sibling
857    let turbofish = build_turbofish(generics);
858
859    // Build the call from wrapper to sibling
860    let sibling_call = if has_self_receiver {
861        // Method: self.__arcane_fn::<T, N>(other_args...)
862        let other_args: Vec<proc_macro2::TokenStream> = inputs
863            .iter()
864            .skip(1) // skip self receiver
865            .filter_map(|arg| {
866                if let FnArg::Typed(pat_type) = arg
867                    && let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref()
868                {
869                    let ident = &pat_ident.ident;
870                    Some(quote!(#ident))
871                } else {
872                    None
873                }
874            })
875            .collect();
876        quote! { self.#sibling_name #turbofish(#(#other_args),*) }
877    } else {
878        // Free function: __arcane_fn::<T, N>(all_args...)
879        let all_args: Vec<proc_macro2::TokenStream> = inputs
880            .iter()
881            .filter_map(|arg| {
882                if let FnArg::Typed(pat_type) = arg
883                    && let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref()
884                {
885                    let ident = &pat_ident.ident;
886                    Some(quote!(#ident))
887                } else {
888                    None
889                }
890            })
891            .collect();
892        quote! { #sibling_name #turbofish(#(#all_args),*) }
893    };
894
895    // Build stub args for suppressing unused warnings
896    let stub_args: Vec<proc_macro2::TokenStream> = inputs
897        .iter()
898        .filter_map(|arg| match arg {
899            FnArg::Typed(pat_type) => {
900                if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
901                    let ident = &pat_ident.ident;
902                    Some(quote!(#ident))
903                } else {
904                    None
905                }
906            }
907            FnArg::Receiver(_) => None, // self doesn't need _ = suppression
908        })
909        .collect();
910
911    let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
912
913    let expanded = if let Some(arch) = target_arch {
914        // Sibling function: #[doc(hidden)] #[target_feature] fn __arcane_fn(...)
915        // Always private — only the wrapper is user-visible.
916        // Safe declaration — Rust 2024 allows safe #[target_feature] functions.
917        let sibling_fn = quote! {
918            #[cfg(target_arch = #arch)]
919            #[doc(hidden)]
920            #(#lint_attrs)*
921            #(#target_feature_attrs)*
922            #inline_attr
923            fn #sibling_name #generics (#sibling_sig_inputs) #output #where_clause {
924                #body
925            }
926        };
927
928        // Wrapper function: fn original_name(...) { unsafe { sibling_call } }
929        // The unsafe block is needed because the sibling has #[target_feature] and
930        // the wrapper doesn't — calling across this boundary requires unsafe.
931        let wrapper_fn = quote! {
932            #[cfg(target_arch = #arch)]
933            #(#attrs)*
934            #[inline(always)]
935            #vis #sig {
936                // SAFETY: The token parameter proves the required CPU features are available.
937                // Calling a #[target_feature] function from a non-matching context requires
938                // unsafe because the CPU may not support those instructions. The token's
939                // existence proves summon() succeeded, so the features are available.
940                unsafe { #sibling_call }
941            }
942        };
943
944        // Optional stub for other architectures
945        let stub = if args.stub {
946            quote! {
947                #[cfg(not(target_arch = #arch))]
948                #(#attrs)*
949                #vis #sig {
950                    let _ = (#(#stub_args),*);
951                    unreachable!(
952                        "BUG: {}() was called but requires {} (target_arch = \"{}\"). \
953                         {}::summon() returns None on this architecture, so this function \
954                         is unreachable in safe code. If you used forge_token_dangerously(), \
955                         that is the bug.",
956                        stringify!(#fn_name),
957                        #token_type_str,
958                        #arch,
959                        #token_type_str,
960                    )
961                }
962            }
963        } else {
964            quote! {}
965        };
966
967        quote! {
968            #sibling_fn
969            #wrapper_fn
970            #stub
971        }
972    } else {
973        // No specific arch (trait bounds or generic) - no cfg guards, no stub needed.
974        // Still use sibling pattern for consistency. Sibling is always private.
975        let sibling_fn = quote! {
976            #[doc(hidden)]
977            #(#lint_attrs)*
978            #(#target_feature_attrs)*
979            #inline_attr
980            fn #sibling_name #generics (#sibling_sig_inputs) #output #where_clause {
981                #body
982            }
983        };
984
985        let wrapper_fn = quote! {
986            #(#attrs)*
987            #[inline(always)]
988            #vis #sig {
989                // SAFETY: The token proves the required CPU features are available.
990                unsafe { #sibling_call }
991            }
992        };
993
994        quote! {
995            #sibling_fn
996            #wrapper_fn
997        }
998    };
999
1000    expanded.into()
1001}
1002
1003/// Nested inner function expansion (opt-in via `nested` or `_self = Type`).
1004///
1005/// This is the original approach: generates a nested inner function inside the
1006/// original function. Required when `_self = Type` is used because Self must be
1007/// replaced in the nested function (where it's not in scope).
1008fn arcane_impl_nested(
1009    input_fn: LightFn,
1010    args: &ArcaneArgs,
1011    target_arch: Option<&str>,
1012    token_type_name: Option<String>,
1013    target_feature_attrs: Vec<Attribute>,
1014    inline_attr: Attribute,
1015) -> TokenStream {
1016    let vis = &input_fn.vis;
1017    let sig = &input_fn.sig;
1018    let fn_name = &sig.ident;
1019    let generics = &sig.generics;
1020    let where_clause = &generics.where_clause;
1021    let inputs = &sig.inputs;
1022    let output = &sig.output;
1023    let body = &input_fn.body;
1024    // Filter out user #[inline] attrs to avoid duplicates (will become a hard error).
1025    let attrs = filter_inline_attrs(&input_fn.attrs);
1026    // Propagate lint attrs to inner function (same issue as sibling mode — #17)
1027    let lint_attrs = filter_lint_attrs(&input_fn.attrs);
1028
1029    // Determine self receiver type if present
1030    let self_receiver_kind: Option<SelfReceiver> = inputs.first().and_then(|arg| match arg {
1031        FnArg::Receiver(receiver) => {
1032            if receiver.reference.is_none() {
1033                Some(SelfReceiver::Owned)
1034            } else if receiver.mutability.is_some() {
1035                Some(SelfReceiver::RefMut)
1036            } else {
1037                Some(SelfReceiver::Ref)
1038            }
1039        }
1040        _ => None,
1041    });
1042
1043    // Build inner function parameters, transforming self if needed.
1044    // Also replace Self in non-self parameter types when _self = Type is set,
1045    // since the inner function is a nested fn where Self from the impl is not in scope.
1046    let inner_params: Vec<proc_macro2::TokenStream> = inputs
1047        .iter()
1048        .map(|arg| match arg {
1049            FnArg::Receiver(_) => {
1050                // Transform self receiver to _self parameter
1051                let self_ty = args.self_type.as_ref().unwrap();
1052                match self_receiver_kind.as_ref().unwrap() {
1053                    SelfReceiver::Owned => quote!(_self: #self_ty),
1054                    SelfReceiver::Ref => quote!(_self: &#self_ty),
1055                    SelfReceiver::RefMut => quote!(_self: &mut #self_ty),
1056                }
1057            }
1058            FnArg::Typed(pat_type) => {
1059                if let Some(ref self_ty) = args.self_type {
1060                    replace_self_in_tokens(quote!(#pat_type), self_ty)
1061                } else {
1062                    quote!(#pat_type)
1063                }
1064            }
1065        })
1066        .collect();
1067
1068    // Build inner function call arguments
1069    let inner_args: Vec<proc_macro2::TokenStream> = inputs
1070        .iter()
1071        .filter_map(|arg| match arg {
1072            FnArg::Typed(pat_type) => {
1073                if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
1074                    let ident = &pat_ident.ident;
1075                    Some(quote!(#ident))
1076                } else {
1077                    None
1078                }
1079            }
1080            FnArg::Receiver(_) => Some(quote!(self)), // Pass self to inner as _self
1081        })
1082        .collect();
1083
1084    let inner_fn_name = format_ident!("__simd_inner_{}", fn_name);
1085
1086    // Build turbofish for forwarding type/const generic params to inner function
1087    let turbofish = build_turbofish(generics);
1088
1089    // Transform output, body, and where clause to replace Self with concrete type if needed.
1090    let (inner_output, inner_body, inner_where_clause): (
1091        proc_macro2::TokenStream,
1092        proc_macro2::TokenStream,
1093        proc_macro2::TokenStream,
1094    ) = if let Some(ref self_ty) = args.self_type {
1095        let transformed_output = replace_self_in_tokens(output.to_token_stream(), self_ty);
1096        let transformed_body = replace_self_in_tokens(body.clone(), self_ty);
1097        let transformed_where = where_clause
1098            .as_ref()
1099            .map(|wc| replace_self_in_tokens(wc.to_token_stream(), self_ty))
1100            .unwrap_or_default();
1101        (transformed_output, transformed_body, transformed_where)
1102    } else {
1103        (
1104            output.to_token_stream(),
1105            body.clone(),
1106            where_clause
1107                .as_ref()
1108                .map(|wc| wc.to_token_stream())
1109                .unwrap_or_default(),
1110        )
1111    };
1112
1113    let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
1114    let expanded = if let Some(arch) = target_arch {
1115        let stub = if args.stub {
1116            quote! {
1117                // Stub for other architectures - the token cannot be obtained
1118                #[cfg(not(target_arch = #arch))]
1119                #(#attrs)*
1120                #vis #sig {
1121                    let _ = (#(#inner_args),*);
1122                    unreachable!(
1123                        "BUG: {}() was called but requires {} (target_arch = \"{}\"). \
1124                         {}::summon() returns None on this architecture, so this function \
1125                         is unreachable in safe code. If you used forge_token_dangerously(), \
1126                         that is the bug.",
1127                        stringify!(#fn_name),
1128                        #token_type_str,
1129                        #arch,
1130                        #token_type_str,
1131                    )
1132                }
1133            }
1134        } else {
1135            quote! {}
1136        };
1137
1138        quote! {
1139            // Real implementation for the correct architecture
1140            #[cfg(target_arch = #arch)]
1141            #(#attrs)*
1142            #[inline(always)]
1143            #vis #sig {
1144                #(#target_feature_attrs)*
1145                #inline_attr
1146                #(#lint_attrs)*
1147                fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #inner_where_clause {
1148                    #inner_body
1149                }
1150
1151                // SAFETY: The token parameter proves the required CPU features are available.
1152                unsafe { #inner_fn_name #turbofish(#(#inner_args),*) }
1153            }
1154
1155            #stub
1156        }
1157    } else {
1158        // No specific arch (trait bounds or generic) - generate without cfg guards
1159        quote! {
1160            #(#attrs)*
1161            #[inline(always)]
1162            #vis #sig {
1163                #(#target_feature_attrs)*
1164                #inline_attr
1165                #(#lint_attrs)*
1166                fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #inner_where_clause {
1167                    #inner_body
1168                }
1169
1170                // SAFETY: The token proves the required CPU features are available.
1171                unsafe { #inner_fn_name #turbofish(#(#inner_args),*) }
1172            }
1173        }
1174    };
1175
1176    expanded.into()
1177}
1178
1179/// Mark a function as an arcane SIMD function.
1180///
1181/// This macro generates a safe wrapper around a `#[target_feature]` function.
1182/// The token parameter type determines which CPU features are enabled.
1183///
1184/// # Expansion Modes
1185///
1186/// ## Sibling (default)
1187///
1188/// Generates two functions at the same scope: a safe `#[target_feature]` sibling
1189/// and a safe wrapper. `self`/`Self` work naturally since both functions share scope.
1190/// Compatible with `#![forbid(unsafe_code)]`.
1191///
1192/// ```ignore
1193/// #[arcane]
1194/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { /* body */ }
1195/// // Expands to (x86_64 only):
1196/// #[cfg(target_arch = "x86_64")]
1197/// #[doc(hidden)]
1198/// #[target_feature(enable = "avx2,fma,...")]
1199/// fn __arcane_process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { /* body */ }
1200///
1201/// #[cfg(target_arch = "x86_64")]
1202/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] {
1203///     unsafe { __arcane_process(token, data) }
1204/// }
1205/// ```
1206///
1207/// Methods work naturally:
1208///
1209/// ```ignore
1210/// impl MyType {
1211///     #[arcane]
1212///     fn compute(&self, token: X64V3Token) -> f32 {
1213///         self.data.iter().sum()  // self/Self just work!
1214///     }
1215/// }
1216/// ```
1217///
1218/// ## Nested (`nested` or `_self = Type`)
1219///
1220/// Generates a nested inner function inside the original. Required for trait impls
1221/// (where sibling functions would fail) and when `_self = Type` is used.
1222///
1223/// ```ignore
1224/// impl SimdOps for MyType {
1225///     #[arcane(_self = MyType)]
1226///     fn compute(&self, token: X64V3Token) -> Self {
1227///         // Use _self instead of self, Self replaced with MyType
1228///         _self.data.iter().sum()
1229///     }
1230/// }
1231/// ```
1232///
1233/// # Cross-Architecture Behavior
1234///
1235/// **Default (cfg-out):** On the wrong architecture, the function is not emitted
1236/// at all — no stub, no dead code. Code that references it must be cfg-gated.
1237///
1238/// **With `stub`:** Generates an `unreachable!()` stub on wrong architectures.
1239/// Use when cross-arch dispatch references the function without cfg guards.
1240///
1241/// ```ignore
1242/// #[arcane(stub)]  // generates stub on wrong arch
1243/// fn process_neon(token: NeonToken, data: &[f32]) -> f32 { ... }
1244/// ```
1245///
1246/// `incant!` is unaffected — it already cfg-gates dispatch calls by architecture.
1247///
1248/// # Token Parameter Forms
1249///
1250/// ```ignore
1251/// // Concrete token
1252/// #[arcane]
1253/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { ... }
1254///
1255/// // impl Trait bound
1256/// #[arcane]
1257/// fn process(token: impl HasX64V2, data: &[f32; 8]) -> [f32; 8] { ... }
1258///
1259/// // Generic with inline or where-clause bounds
1260/// #[arcane]
1261/// fn process<T: HasX64V2>(token: T, data: &[f32; 8]) -> [f32; 8] { ... }
1262///
1263/// // Wildcard
1264/// #[arcane]
1265/// fn process(_: X64V3Token, data: &[f32; 8]) -> [f32; 8] { ... }
1266/// ```
1267///
1268/// # Options
1269///
1270/// | Option | Effect |
1271/// |--------|--------|
1272/// | `stub` | Generate `unreachable!()` stub on wrong architecture |
1273/// | `nested` | Use nested inner function instead of sibling |
1274/// | `_self = Type` | Implies `nested`, transforms self receiver, replaces Self |
1275/// | `inline_always` | Use `#[inline(always)]` (requires nightly) |
1276/// | `import_intrinsics` | Auto-import `archmage::intrinsics::{arch}::*` (includes safe memory ops) |
1277/// | `import_magetypes` | Auto-import `magetypes::simd::{ns}::*` and `magetypes::simd::backends::*` |
1278///
1279/// ## Auto-Imports
1280///
1281/// `import_intrinsics` and `import_magetypes` inject `use` statements into the
1282/// function body, eliminating boilerplate. The macro derives the architecture and
1283/// namespace from the token type:
1284///
1285/// ```ignore
1286/// // Without auto-imports — lots of boilerplate:
1287/// use std::arch::x86_64::*;
1288/// use magetypes::simd::v3::*;
1289///
1290/// #[arcane]
1291/// fn process(token: X64V3Token, data: &[f32; 8]) -> f32 {
1292///     let v = f32x8::load(token, data);
1293///     let zero = _mm256_setzero_ps();
1294///     // ...
1295/// }
1296///
1297/// // With auto-imports — clean:
1298/// #[arcane(import_intrinsics, import_magetypes)]
1299/// fn process(token: X64V3Token, data: &[f32; 8]) -> f32 {
1300///     let v = f32x8::load(token, data);
1301///     let zero = _mm256_setzero_ps();
1302///     // ...
1303/// }
1304/// ```
1305///
1306/// The namespace mapping is token-driven:
1307///
1308/// | Token | `import_intrinsics` | `import_magetypes` |
1309/// |-------|--------------------|--------------------|
1310/// | `X64V1..V3Token` | `archmage::intrinsics::x86_64::*` | `magetypes::simd::v3::*` |
1311/// | `X64V4Token` | `archmage::intrinsics::x86_64::*` | `magetypes::simd::v4::*` |
1312/// | `X64V4xToken` | `archmage::intrinsics::x86_64::*` | `magetypes::simd::v4x::*` |
1313/// | `NeonToken` / ARM | `archmage::intrinsics::aarch64::*` | `magetypes::simd::neon::*` |
1314/// | `Wasm128Token` | `archmage::intrinsics::wasm32::*` | `magetypes::simd::wasm128::*` |
1315///
1316/// Works with concrete tokens, `impl Trait` bounds, and generic parameters.
1317///
1318/// # Supported Tokens
1319///
1320/// - **x86_64**: `X64V2Token`, `X64V3Token`/`Desktop64`, `X64V4Token`/`Avx512Token`/`Server64`,
1321///   `X64V4xToken`, `Avx512Fp16Token`, `X64CryptoToken`, `X64V3CryptoToken`
1322/// - **ARM**: `NeonToken`/`Arm64`, `Arm64V2Token`, `Arm64V3Token`,
1323///   `NeonAesToken`, `NeonSha3Token`, `NeonCrcToken`
1324/// - **WASM**: `Wasm128Token`
1325///
1326/// # Supported Trait Bounds
1327///
1328/// `HasX64V2`, `HasX64V4`, `HasNeon`, `HasNeonAes`, `HasNeonSha3`, `HasArm64V2`, `HasArm64V3`
1329///
1330/// ```ignore
1331/// #![feature(target_feature_inline_always)]
1332///
1333/// #[arcane(inline_always)]
1334/// fn fast_kernel(token: Avx2Token, data: &mut [f32]) {
1335///     // Inner function will use #[inline(always)]
1336/// }
1337/// ```
1338#[proc_macro_attribute]
1339pub fn arcane(attr: TokenStream, item: TokenStream) -> TokenStream {
1340    let args = parse_macro_input!(attr as ArcaneArgs);
1341    let input_fn = parse_macro_input!(item as LightFn);
1342    arcane_impl(input_fn, "arcane", args)
1343}
1344
1345/// Legacy alias for [`arcane`].
1346///
1347/// **Deprecated:** Use `#[arcane]` instead. This alias exists only for migration.
1348#[proc_macro_attribute]
1349#[doc(hidden)]
1350pub fn simd_fn(attr: TokenStream, item: TokenStream) -> TokenStream {
1351    let args = parse_macro_input!(attr as ArcaneArgs);
1352    let input_fn = parse_macro_input!(item as LightFn);
1353    arcane_impl(input_fn, "simd_fn", args)
1354}
1355
1356/// Descriptive alias for [`arcane`].
1357///
1358/// Generates a safe wrapper around a `#[target_feature]` inner function.
1359/// The token type in your signature determines which CPU features are enabled.
1360/// Creates an LLVM optimization boundary — use [`token_target_features`]
1361/// (alias for [`rite`]) for inner helpers to avoid this.
1362///
1363/// Since Rust 1.85, value-based SIMD intrinsics are safe inside
1364/// `#[target_feature]` functions. This macro generates the `#[target_feature]`
1365/// wrapper so you never need to write `unsafe` for SIMD code.
1366///
1367/// See [`arcane`] for full documentation and examples.
1368#[proc_macro_attribute]
1369pub fn token_target_features_boundary(attr: TokenStream, item: TokenStream) -> TokenStream {
1370    let args = parse_macro_input!(attr as ArcaneArgs);
1371    let input_fn = parse_macro_input!(item as LightFn);
1372    arcane_impl(input_fn, "token_target_features_boundary", args)
1373}
1374
1375// ============================================================================
1376// Rite macro for inner SIMD functions (inlines into matching #[target_feature] callers)
1377// ============================================================================
1378
1379/// Annotate inner SIMD helpers called from `#[arcane]` functions.
1380///
1381/// Unlike `#[arcane]`, which creates an inner `#[target_feature]` function behind
1382/// a safe boundary, `#[rite]` adds `#[target_feature]` and `#[inline]` directly.
1383/// LLVM inlines it into any caller with matching features — no boundary crossing.
1384///
1385/// # Three Modes
1386///
1387/// **Token-based:** Reads the token type from the function signature.
1388/// ```ignore
1389/// #[rite]
1390/// fn helper(_: X64V3Token, v: __m256) -> __m256 { _mm256_add_ps(v, v) }
1391/// ```
1392///
1393/// **Tier-based:** Specify the tier name directly, no token parameter needed.
1394/// ```ignore
1395/// #[rite(v3)]
1396/// fn helper(v: __m256) -> __m256 { _mm256_add_ps(v, v) }
1397/// ```
1398///
1399/// Both produce identical code. The token form can be easier to remember if
1400/// you already have the token in scope.
1401///
1402/// **Multi-tier:** Specify multiple tiers to generate suffixed variants.
1403/// ```ignore
1404/// #[rite(v3, v4)]
1405/// fn process(data: &[f32; 4]) -> f32 { data.iter().sum() }
1406/// // Generates: process_v3() and process_v4()
1407/// ```
1408///
1409/// Each variant gets its own `#[target_feature]` and `#[cfg(target_arch)]`.
1410/// Since Rust 1.85, calling these from a matching `#[arcane]` or `#[rite]`
1411/// context is safe — no `unsafe` needed when the caller has matching or
1412/// superset features.
1413///
1414/// # Safety
1415///
1416/// `#[rite]` functions can only be safely called from contexts where the
1417/// required CPU features are enabled:
1418/// - From within `#[arcane]` functions with matching/superset tokens
1419/// - From within other `#[rite]` functions with matching/superset tokens
1420/// - From code compiled with `-Ctarget-cpu` that enables the features
1421///
1422/// Calling from other contexts requires `unsafe` and the caller must ensure
1423/// the CPU supports the required features.
1424///
1425/// # Cross-Architecture Behavior
1426///
1427/// Like `#[arcane]`, defaults to cfg-out (no function on wrong arch).
1428/// Use `#[rite(stub)]` to generate an unreachable stub instead.
1429///
1430/// # Options
1431///
1432/// | Option | Effect |
1433/// |--------|--------|
1434/// | tier name(s) | `v3`, `neon`, etc. One = single function; multiple = suffixed variants |
1435/// | `stub` | Generate `unreachable!()` stub on wrong architecture |
1436/// | `import_intrinsics` | Auto-import `archmage::intrinsics::{arch}::*` (includes safe memory ops) |
1437/// | `import_magetypes` | Auto-import `magetypes::simd::{ns}::*` and `magetypes::simd::backends::*` |
1438///
1439/// See `#[arcane]` docs for the full namespace mapping table.
1440///
1441/// # Comparison with #[arcane]
1442///
1443/// | Aspect | `#[arcane]` | `#[rite]` |
1444/// |--------|-------------|-----------|
1445/// | Creates wrapper | Yes | No |
1446/// | Entry point | Yes | No |
1447/// | Inlines into caller | No (barrier) | Yes |
1448/// | Safe to call anywhere | Yes (with token) | Only from feature-enabled context |
1449/// | Multi-tier variants | No | Yes (`#[rite(v3, v4, neon)]`) |
1450/// | `stub` param | Yes | Yes |
1451/// | `import_intrinsics` | Yes | Yes |
1452/// | `import_magetypes` | Yes | Yes |
1453#[proc_macro_attribute]
1454pub fn rite(attr: TokenStream, item: TokenStream) -> TokenStream {
1455    let args = parse_macro_input!(attr as RiteArgs);
1456    let input_fn = parse_macro_input!(item as LightFn);
1457    rite_impl(input_fn, args)
1458}
1459
1460/// Descriptive alias for [`rite`].
1461///
1462/// Applies `#[target_feature]` + `#[inline]` based on the token type in your
1463/// function signature. No wrapper, no optimization boundary. Use for functions
1464/// called from within `#[arcane]`/`#[token_target_features_boundary]` code.
1465///
1466/// Since Rust 1.85, calling a `#[target_feature]` function from another function
1467/// with matching features is safe — no `unsafe` needed.
1468///
1469/// See [`rite`] for full documentation and examples.
1470#[proc_macro_attribute]
1471pub fn token_target_features(attr: TokenStream, item: TokenStream) -> TokenStream {
1472    let args = parse_macro_input!(attr as RiteArgs);
1473    let input_fn = parse_macro_input!(item as LightFn);
1474    rite_impl(input_fn, args)
1475}
1476
1477/// Arguments for the `#[rite]` macro.
1478#[derive(Default)]
1479struct RiteArgs {
1480    /// Generate an `unreachable!()` stub on the wrong architecture.
1481    /// Default is false (cfg-out: no function emitted on wrong arch).
1482    stub: bool,
1483    /// Inject `use archmage::intrinsics::{arch}::*;` (includes safe memory ops).
1484    import_intrinsics: bool,
1485    /// Inject `use magetypes::simd::{ns}::*;`, `use magetypes::simd::generic::*;`,
1486    /// and `use magetypes::simd::backends::*;`.
1487    import_magetypes: bool,
1488    /// Tiers specified directly (e.g., `#[rite(v3)]` or `#[rite(v3, v4, neon)]`).
1489    /// Stored as canonical token names (e.g., "X64V3Token").
1490    /// Single tier: generates one function (no suffix, no token parameter needed).
1491    /// Multiple tiers: generates suffixed variants (e.g., `fn_v3`, `fn_v4`, `fn_neon`).
1492    tier_tokens: Vec<String>,
1493}
1494
1495impl Parse for RiteArgs {
1496    fn parse(input: ParseStream) -> syn::Result<Self> {
1497        let mut args = RiteArgs::default();
1498
1499        while !input.is_empty() {
1500            let ident: Ident = input.parse()?;
1501            match ident.to_string().as_str() {
1502                "stub" => args.stub = true,
1503                "import_intrinsics" => args.import_intrinsics = true,
1504                "import_magetypes" => args.import_magetypes = true,
1505                other => {
1506                    if let Some(canonical) = tier_to_canonical_token(other) {
1507                        args.tier_tokens.push(String::from(canonical));
1508                    } else {
1509                        return Err(syn::Error::new(
1510                            ident.span(),
1511                            format!(
1512                                "unknown rite argument: `{}`. Supported: tier names \
1513                                 (v1, v2, v3, v4, neon, arm_v2, wasm128, ...), \
1514                                 `stub`, `import_intrinsics`, `import_magetypes`.",
1515                                other
1516                            ),
1517                        ));
1518                    }
1519                }
1520            }
1521            if input.peek(Token![,]) {
1522                let _: Token![,] = input.parse()?;
1523            }
1524        }
1525
1526        Ok(args)
1527    }
1528}
1529
1530/// Implementation for the `#[rite]` macro.
1531fn rite_impl(input_fn: LightFn, args: RiteArgs) -> TokenStream {
1532    // Multi-tier mode: generate suffixed variants for each tier
1533    if args.tier_tokens.len() > 1 {
1534        return rite_multi_tier_impl(input_fn, &args);
1535    }
1536
1537    // Single-tier or token-param mode
1538    rite_single_impl(input_fn, args)
1539}
1540
1541/// Generate a single `#[rite]` function (single tier or token-param mode).
1542fn rite_single_impl(mut input_fn: LightFn, args: RiteArgs) -> TokenStream {
1543    // Resolve features: either from tier name or from token parameter
1544    let TokenParamInfo {
1545        features,
1546        target_arch,
1547        token_type_name: _token_type_name,
1548        magetypes_namespace,
1549        ..
1550    } = if let Some(tier_token) = args.tier_tokens.first() {
1551        // Tier specified directly (e.g., #[rite(v3)]) — no token param needed
1552        let features = token_to_features(tier_token)
1553            .expect("tier_to_canonical_token returned invalid token name")
1554            .to_vec();
1555        let target_arch = token_to_arch(tier_token);
1556        let magetypes_namespace = token_to_magetypes_namespace(tier_token);
1557        TokenParamInfo {
1558            ident: Ident::new("_", proc_macro2::Span::call_site()),
1559            features,
1560            target_arch,
1561            token_type_name: Some(tier_token.clone()),
1562            magetypes_namespace,
1563        }
1564    } else {
1565        match find_token_param(&input_fn.sig) {
1566            Some(result) => result,
1567            None => {
1568                // Check for specific misuse: featureless traits like SimdToken
1569                if let Some(trait_name) = diagnose_featureless_token(&input_fn.sig) {
1570                    let msg = format!(
1571                        "`{trait_name}` cannot be used as a token bound in #[rite] \
1572                         because it doesn't specify any CPU features.\n\
1573                         \n\
1574                         #[rite] needs concrete features to generate #[target_feature]. \
1575                         Use a concrete token, a feature trait, or a tier name:\n\
1576                         \n\
1577                         Concrete tokens: X64V3Token, Desktop64, NeonToken, Arm64V2Token, ...\n\
1578                         Feature traits:  impl HasX64V2, impl HasNeon, impl HasArm64V3, ...\n\
1579                         Tier names:      #[rite(v3)], #[rite(neon)], #[rite(v4)], ..."
1580                    );
1581                    return syn::Error::new_spanned(&input_fn.sig, msg)
1582                        .to_compile_error()
1583                        .into();
1584                }
1585                let msg = "rite requires a token parameter or a tier name. Supported forms:\n\
1586                     - Tier name: `#[rite(v3)]`, `#[rite(neon)]`\n\
1587                     - Multi-tier: `#[rite(v3, v4, neon)]` (generates suffixed variants)\n\
1588                     - Concrete: `token: X64V3Token`\n\
1589                     - impl Trait: `token: impl HasX64V2`\n\
1590                     - Generic: `fn foo<T: HasX64V2>(token: T, ...)`";
1591                return syn::Error::new_spanned(&input_fn.sig, msg)
1592                    .to_compile_error()
1593                    .into();
1594            }
1595        }
1596    };
1597
1598    // Check: import_intrinsics with AVX-512 features requires the avx512 cargo feature.
1599    // Check resolved features (not token name) for uniform handling of concrete/trait/generic.
1600    #[cfg(not(feature = "avx512"))]
1601    if args.import_intrinsics && features.iter().any(|f| f.starts_with("avx512")) {
1602        let token_desc = _token_type_name.as_deref().unwrap_or("an AVX-512 token");
1603        let msg = format!(
1604            "Using {token_desc} with `import_intrinsics` requires the `avx512` feature.\n\
1605             \n\
1606             Add to your Cargo.toml:\n\
1607             \x20 archmage = {{ version = \"...\", features = [\"avx512\"] }}\n\
1608             \n\
1609             Without it, 512-bit safe memory ops (_mm512_loadu_ps etc.) are not available.\n\
1610             If you only need value intrinsics (no memory ops), remove `import_intrinsics`."
1611        );
1612        return syn::Error::new_spanned(&input_fn.sig, msg)
1613            .to_compile_error()
1614            .into();
1615    }
1616
1617    // Build target_feature attributes
1618    let target_feature_attrs: Vec<Attribute> = features
1619        .iter()
1620        .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
1621        .collect();
1622
1623    // Always use #[inline] - #[inline(always)] + #[target_feature] requires nightly
1624    let inline_attr: Attribute = parse_quote!(#[inline]);
1625
1626    // Prepend attributes to the function, filtering user #[inline] to avoid duplicates
1627    let mut new_attrs = target_feature_attrs;
1628    new_attrs.push(inline_attr);
1629    for attr in filter_inline_attrs(&input_fn.attrs) {
1630        new_attrs.push(attr.clone());
1631    }
1632    input_fn.attrs = new_attrs;
1633
1634    // Prepend import statements to body if requested
1635    let body_imports = generate_imports(
1636        target_arch,
1637        magetypes_namespace,
1638        args.import_intrinsics,
1639        args.import_magetypes,
1640    );
1641    if !body_imports.is_empty() {
1642        let original_body = &input_fn.body;
1643        input_fn.body = quote! {
1644            #body_imports
1645            #original_body
1646        };
1647    }
1648
1649    // If we know the target arch, generate cfg-gated impl (+ optional stub)
1650    if let Some(arch) = target_arch {
1651        let vis = &input_fn.vis;
1652        let sig = &input_fn.sig;
1653        let attrs = &input_fn.attrs;
1654        let body = &input_fn.body;
1655
1656        let stub = if args.stub {
1657            quote! {
1658                #[cfg(not(target_arch = #arch))]
1659                #vis #sig {
1660                    unreachable!(concat!(
1661                        "This function requires ",
1662                        #arch,
1663                        " architecture"
1664                    ))
1665                }
1666            }
1667        } else {
1668            quote! {}
1669        };
1670
1671        quote! {
1672            #[cfg(target_arch = #arch)]
1673            #(#attrs)*
1674            #vis #sig {
1675                #body
1676            }
1677
1678            #stub
1679        }
1680        .into()
1681    } else {
1682        // No specific arch (trait bounds) - just emit the annotated function
1683        quote!(#input_fn).into()
1684    }
1685}
1686
1687/// Generate multiple suffixed `#[rite]` variants for multi-tier mode.
1688///
1689/// `#[rite(v3, v4, neon)]` on `fn process(...)` generates:
1690/// - `fn process_v3(...)` with `#[target_feature(enable = "avx2,fma,...")]`
1691/// - `fn process_v4(...)` with `#[target_feature(enable = "avx512f,...")]`
1692/// - `fn process_neon(...)` with `#[target_feature(enable = "neon")]`
1693///
1694/// Each variant is cfg-gated to its architecture and gets `#[inline]`.
1695fn rite_multi_tier_impl(input_fn: LightFn, args: &RiteArgs) -> TokenStream {
1696    let fn_name = &input_fn.sig.ident;
1697    let mut variants = proc_macro2::TokenStream::new();
1698
1699    for tier_token in &args.tier_tokens {
1700        let features = match token_to_features(tier_token) {
1701            Some(f) => f,
1702            None => {
1703                return syn::Error::new_spanned(
1704                    &input_fn.sig,
1705                    format!("unknown token `{tier_token}` in multi-tier #[rite]"),
1706                )
1707                .to_compile_error()
1708                .into();
1709            }
1710        };
1711        let target_arch = token_to_arch(tier_token);
1712        let magetypes_namespace = token_to_magetypes_namespace(tier_token);
1713
1714        // Check: import_intrinsics with AVX-512 features requires the avx512 cargo feature.
1715        #[cfg(not(feature = "avx512"))]
1716        if args.import_intrinsics && features.iter().any(|f| f.starts_with("avx512")) {
1717            let msg = format!(
1718                "Using {tier_token} with `import_intrinsics` requires the `avx512` feature.\n\
1719                 \n\
1720                 Add to your Cargo.toml:\n\
1721                 \x20 archmage = {{ version = \"...\", features = [\"avx512\"] }}\n\
1722                 \n\
1723                 Without it, 512-bit safe memory ops (_mm512_loadu_ps etc.) are not available.\n\
1724                 If you only need value intrinsics (no memory ops), remove `import_intrinsics`."
1725            );
1726            return syn::Error::new_spanned(&input_fn.sig, msg)
1727                .to_compile_error()
1728                .into();
1729        }
1730
1731        let suffix = canonical_token_to_tier_suffix(tier_token)
1732            .expect("canonical token must have a tier suffix");
1733
1734        // Build suffixed function name
1735        let suffixed_ident = format_ident!("{}_{}", fn_name, suffix);
1736
1737        // Clone and rename the function
1738        let mut variant_fn = input_fn.clone();
1739        variant_fn.sig.ident = suffixed_ident;
1740
1741        // Build target_feature attributes
1742        let target_feature_attrs: Vec<Attribute> = features
1743            .iter()
1744            .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
1745            .collect();
1746        let inline_attr: Attribute = parse_quote!(#[inline]);
1747
1748        let mut new_attrs = target_feature_attrs;
1749        new_attrs.push(inline_attr);
1750        for attr in filter_inline_attrs(&variant_fn.attrs) {
1751            new_attrs.push(attr.clone());
1752        }
1753        variant_fn.attrs = new_attrs;
1754
1755        // Prepend import statements if requested
1756        let body_imports = generate_imports(
1757            target_arch,
1758            magetypes_namespace,
1759            args.import_intrinsics,
1760            args.import_magetypes,
1761        );
1762        if !body_imports.is_empty() {
1763            let original_body = &variant_fn.body;
1764            variant_fn.body = quote! {
1765                #body_imports
1766                #original_body
1767            };
1768        }
1769
1770        // Emit cfg-gated variant
1771        if let Some(arch) = target_arch {
1772            let vis = &variant_fn.vis;
1773            let sig = &variant_fn.sig;
1774            let attrs = &variant_fn.attrs;
1775            let body = &variant_fn.body;
1776
1777            variants.extend(quote! {
1778                #[cfg(target_arch = #arch)]
1779                #(#attrs)*
1780                #vis #sig {
1781                    #body
1782                }
1783            });
1784
1785            if args.stub {
1786                variants.extend(quote! {
1787                    #[cfg(not(target_arch = #arch))]
1788                    #vis #sig {
1789                        unreachable!(concat!(
1790                            "This function requires ",
1791                            #arch,
1792                            " architecture"
1793                        ))
1794                    }
1795                });
1796            }
1797        } else {
1798            // No specific arch — just emit the annotated function
1799            variants.extend(quote!(#variant_fn));
1800        }
1801    }
1802
1803    variants.into()
1804}
1805
1806// =============================================================================
1807// magetypes! macro - generate platform variants from generic function
1808// =============================================================================
1809
1810/// Generate platform-specific variants from a function by replacing `Token`.
1811///
1812/// Use `Token` as a placeholder for the token type. The macro generates
1813/// suffixed variants with `Token` replaced by the concrete token type, and
1814/// each variant wrapped in the appropriate `#[cfg(target_arch = ...)]` guard.
1815///
1816/// # Default tiers
1817///
1818/// Without arguments, generates `_v3`, `_v4`, `_neon`, `_wasm128`, `_scalar`:
1819///
1820/// ```rust,ignore
1821/// #[magetypes]
1822/// fn process(token: Token, data: &[f32]) -> f32 {
1823///     inner_simd_work(token, data)
1824/// }
1825/// ```
1826///
1827/// # Explicit tiers
1828///
1829/// Specify which tiers to generate:
1830///
1831/// ```rust,ignore
1832/// #[magetypes(v1, v3, neon)]
1833/// fn process(token: Token, data: &[f32]) -> f32 {
1834///     inner_simd_work(token, data)
1835/// }
1836/// // Generates: process_v1, process_v3, process_neon, process_scalar
1837/// ```
1838///
1839/// `scalar` is always included implicitly.
1840///
1841/// Known tiers: `v1`, `v2`, `v3`, `v4`, `v4x`, `neon`, `neon_aes`,
1842/// `neon_sha3`, `neon_crc`, `wasm128`, `wasm128_relaxed`, `scalar`.
1843///
1844/// # What gets replaced
1845///
1846/// **Only `Token`** is replaced — with the concrete token type for each variant
1847/// (e.g., `archmage::X64V3Token`, `archmage::ScalarToken`). SIMD types like
1848/// `f32x8` and constants like `LANES` are **not** replaced by this macro.
1849///
1850/// # Usage with incant!
1851///
1852/// The generated variants work with `incant!` for dispatch:
1853///
1854/// ```rust,ignore
1855/// pub fn process_api(data: &[f32]) -> f32 {
1856///     incant!(process(data))
1857/// }
1858///
1859/// // Or with matching explicit tiers:
1860/// pub fn process_api(data: &[f32]) -> f32 {
1861///     incant!(process(data), [v1, v3, neon, scalar])
1862/// }
1863/// ```
1864#[proc_macro_attribute]
1865pub fn magetypes(attr: TokenStream, item: TokenStream) -> TokenStream {
1866    let input_fn = parse_macro_input!(item as LightFn);
1867
1868    // Parse optional tier list from attribute args
1869    let tier_names: Vec<String> = if attr.is_empty() {
1870        DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect()
1871    } else {
1872        let parser = |input: ParseStream| input.parse_terminated(Ident::parse, Token![,]);
1873        let idents = match syn::parse::Parser::parse(parser, attr) {
1874            Ok(p) => p,
1875            Err(e) => return e.to_compile_error().into(),
1876        };
1877        idents.iter().map(|i| i.to_string()).collect()
1878    };
1879
1880    let tiers = match resolve_tiers(&tier_names, input_fn.sig.ident.span()) {
1881        Ok(t) => t,
1882        Err(e) => return e.to_compile_error().into(),
1883    };
1884
1885    magetypes_impl(input_fn, &tiers)
1886}
1887
1888fn magetypes_impl(mut input_fn: LightFn, tiers: &[&TierDescriptor]) -> TokenStream {
1889    // Strip user-provided #[arcane] / #[rite] to prevent double-wrapping
1890    // (magetypes auto-adds #[arcane] on non-scalar variants)
1891    input_fn
1892        .attrs
1893        .retain(|attr| !attr.path().is_ident("arcane") && !attr.path().is_ident("rite"));
1894
1895    let fn_name = &input_fn.sig.ident;
1896    let fn_attrs = &input_fn.attrs;
1897
1898    // Convert function to string for text substitution
1899    let fn_str = input_fn.to_token_stream().to_string();
1900
1901    let mut variants = Vec::new();
1902
1903    for tier in tiers {
1904        // Create suffixed function name
1905        let suffixed_name = format!("{}_{}", fn_name, tier.suffix);
1906
1907        // Do text substitution
1908        let mut variant_str = fn_str.clone();
1909
1910        // Replace function name
1911        variant_str = variant_str.replacen(&fn_name.to_string(), &suffixed_name, 1);
1912
1913        // Replace Token type with concrete token
1914        variant_str = variant_str.replace("Token", tier.token_path);
1915
1916        // Parse back to tokens
1917        let variant_tokens: proc_macro2::TokenStream = match variant_str.parse() {
1918            Ok(t) => t,
1919            Err(e) => {
1920                return syn::Error::new_spanned(
1921                    &input_fn,
1922                    format!(
1923                        "Failed to parse generated variant `{}`: {}",
1924                        suffixed_name, e
1925                    ),
1926                )
1927                .to_compile_error()
1928                .into();
1929            }
1930        };
1931
1932        // Add cfg guard (arch only — no cargo feature checks in output)
1933        let cfg_guard = match tier.target_arch {
1934            Some(arch) => quote! { #[cfg(target_arch = #arch)] },
1935            None => quote! {},
1936        };
1937
1938        variants.push(if tier.name != "scalar" {
1939            // Non-scalar variants get #[arcane] so target_feature is applied
1940            quote! {
1941                #cfg_guard
1942                #[archmage::arcane]
1943                #variant_tokens
1944            }
1945        } else {
1946            quote! {
1947                #cfg_guard
1948                #variant_tokens
1949            }
1950        });
1951    }
1952
1953    // Remove attributes from the list that should not be duplicated
1954    let filtered_attrs: Vec<_> = fn_attrs
1955        .iter()
1956        .filter(|a| !a.path().is_ident("magetypes"))
1957        .collect();
1958
1959    let output = quote! {
1960        #(#filtered_attrs)*
1961        #(#variants)*
1962    };
1963
1964    output.into()
1965}
1966
1967// =============================================================================
1968// incant! macro - dispatch to platform-specific variants
1969// =============================================================================
1970
1971// =============================================================================
1972// Tier descriptors for incant! and #[magetypes]
1973// =============================================================================
1974
1975/// Describes a dispatch tier for incant! and #[magetypes].
1976struct TierDescriptor {
1977    /// Tier name as written in user code (e.g., "v3", "neon")
1978    name: &'static str,
1979    /// Function suffix (e.g., "v3", "neon", "scalar")
1980    suffix: &'static str,
1981    /// Token type path (e.g., "archmage::X64V3Token")
1982    token_path: &'static str,
1983    /// IntoConcreteToken method name (e.g., "as_x64v3")
1984    as_method: &'static str,
1985    /// Target architecture for cfg guard (None = no guard)
1986    target_arch: Option<&'static str>,
1987    /// Dispatch priority (higher = tried first within same arch)
1988    priority: u32,
1989}
1990
1991/// All known tiers in dispatch-priority order (highest first within arch).
1992const ALL_TIERS: &[TierDescriptor] = &[
1993    // x86: highest to lowest
1994    TierDescriptor {
1995        name: "v4x",
1996        suffix: "v4x",
1997        token_path: "archmage::X64V4xToken",
1998        as_method: "as_x64v4x",
1999        target_arch: Some("x86_64"),
2000
2001        priority: 50,
2002    },
2003    TierDescriptor {
2004        name: "v4",
2005        suffix: "v4",
2006        token_path: "archmage::X64V4Token",
2007        as_method: "as_x64v4",
2008        target_arch: Some("x86_64"),
2009
2010        priority: 40,
2011    },
2012    TierDescriptor {
2013        name: "v3_crypto",
2014        suffix: "v3_crypto",
2015        token_path: "archmage::X64V3CryptoToken",
2016        as_method: "as_x64v3_crypto",
2017        target_arch: Some("x86_64"),
2018
2019        priority: 35,
2020    },
2021    TierDescriptor {
2022        name: "v3",
2023        suffix: "v3",
2024        token_path: "archmage::X64V3Token",
2025        as_method: "as_x64v3",
2026        target_arch: Some("x86_64"),
2027
2028        priority: 30,
2029    },
2030    TierDescriptor {
2031        name: "x64_crypto",
2032        suffix: "x64_crypto",
2033        token_path: "archmage::X64CryptoToken",
2034        as_method: "as_x64_crypto",
2035        target_arch: Some("x86_64"),
2036
2037        priority: 25,
2038    },
2039    TierDescriptor {
2040        name: "v2",
2041        suffix: "v2",
2042        token_path: "archmage::X64V2Token",
2043        as_method: "as_x64v2",
2044        target_arch: Some("x86_64"),
2045
2046        priority: 20,
2047    },
2048    TierDescriptor {
2049        name: "v1",
2050        suffix: "v1",
2051        token_path: "archmage::X64V1Token",
2052        as_method: "as_x64v1",
2053        target_arch: Some("x86_64"),
2054
2055        priority: 10,
2056    },
2057    // ARM: highest to lowest
2058    TierDescriptor {
2059        name: "arm_v3",
2060        suffix: "arm_v3",
2061        token_path: "archmage::Arm64V3Token",
2062        as_method: "as_arm_v3",
2063        target_arch: Some("aarch64"),
2064
2065        priority: 50,
2066    },
2067    TierDescriptor {
2068        name: "arm_v2",
2069        suffix: "arm_v2",
2070        token_path: "archmage::Arm64V2Token",
2071        as_method: "as_arm_v2",
2072        target_arch: Some("aarch64"),
2073
2074        priority: 40,
2075    },
2076    TierDescriptor {
2077        name: "neon_aes",
2078        suffix: "neon_aes",
2079        token_path: "archmage::NeonAesToken",
2080        as_method: "as_neon_aes",
2081        target_arch: Some("aarch64"),
2082
2083        priority: 30,
2084    },
2085    TierDescriptor {
2086        name: "neon_sha3",
2087        suffix: "neon_sha3",
2088        token_path: "archmage::NeonSha3Token",
2089        as_method: "as_neon_sha3",
2090        target_arch: Some("aarch64"),
2091
2092        priority: 30,
2093    },
2094    TierDescriptor {
2095        name: "neon_crc",
2096        suffix: "neon_crc",
2097        token_path: "archmage::NeonCrcToken",
2098        as_method: "as_neon_crc",
2099        target_arch: Some("aarch64"),
2100
2101        priority: 30,
2102    },
2103    TierDescriptor {
2104        name: "neon",
2105        suffix: "neon",
2106        token_path: "archmage::NeonToken",
2107        as_method: "as_neon",
2108        target_arch: Some("aarch64"),
2109
2110        priority: 20,
2111    },
2112    // WASM
2113    TierDescriptor {
2114        name: "wasm128_relaxed",
2115        suffix: "wasm128_relaxed",
2116        token_path: "archmage::Wasm128RelaxedToken",
2117        as_method: "as_wasm128_relaxed",
2118        target_arch: Some("wasm32"),
2119
2120        priority: 21,
2121    },
2122    TierDescriptor {
2123        name: "wasm128",
2124        suffix: "wasm128",
2125        token_path: "archmage::Wasm128Token",
2126        as_method: "as_wasm128",
2127        target_arch: Some("wasm32"),
2128
2129        priority: 20,
2130    },
2131    // Scalar (always last)
2132    TierDescriptor {
2133        name: "scalar",
2134        suffix: "scalar",
2135        token_path: "archmage::ScalarToken",
2136        as_method: "as_scalar",
2137        target_arch: None,
2138
2139        priority: 0,
2140    },
2141];
2142
2143/// Default tiers for `incant!` and `#[magetypes]`.
2144///
2145/// Without the `avx512` feature, v4/v4x are excluded from defaults because most
2146/// users won't have written `_v4` functions. With avx512, v4 is included since
2147/// safe 512-bit memory ops are available for `import_intrinsics`.
2148#[cfg(feature = "avx512")]
2149const DEFAULT_TIER_NAMES: &[&str] = &["v4", "v3", "neon", "wasm128", "scalar"];
2150#[cfg(not(feature = "avx512"))]
2151const DEFAULT_TIER_NAMES: &[&str] = &["v3", "neon", "wasm128", "scalar"];
2152
2153/// Default tiers for `#[autoversion]`. Always includes v4 because autoversion
2154/// generates scalar code compiled with `#[target_feature]` — no safe memory ops
2155/// needed, no `import_intrinsics`, so the `avx512` feature is irrelevant.
2156const AUTOVERSION_DEFAULT_TIER_NAMES: &[&str] = &["v4", "v3", "neon", "wasm128", "scalar"];
2157
2158/// Whether `incant!` requires `scalar` in explicit tier lists.
2159/// Currently false for backwards compatibility. Flip to true in v1.0.
2160const REQUIRE_EXPLICIT_SCALAR: bool = false;
2161
2162/// Look up a tier by name, returning an error on unknown names.
2163fn find_tier(name: &str) -> Option<&'static TierDescriptor> {
2164    ALL_TIERS.iter().find(|t| t.name == name)
2165}
2166
2167/// Resolve tier names to descriptors, sorted by dispatch priority (highest first).
2168/// Always appends "scalar" if not already present.
2169fn resolve_tiers(
2170    tier_names: &[String],
2171    error_span: proc_macro2::Span,
2172) -> syn::Result<Vec<&'static TierDescriptor>> {
2173    let mut tiers = Vec::new();
2174    for name in tier_names {
2175        match find_tier(name) {
2176            Some(tier) => tiers.push(tier),
2177            None => {
2178                let known: Vec<&str> = ALL_TIERS.iter().map(|t| t.name).collect();
2179                return Err(syn::Error::new(
2180                    error_span,
2181                    format!("unknown tier `{}`. Known tiers: {}", name, known.join(", ")),
2182                ));
2183            }
2184        }
2185    }
2186
2187    // Always include scalar fallback
2188    if !tiers.iter().any(|t| t.name == "scalar") {
2189        tiers.push(find_tier("scalar").unwrap());
2190    }
2191
2192    // Sort by priority (highest first) for correct dispatch order
2193    tiers.sort_by(|a, b| b.priority.cmp(&a.priority));
2194
2195    Ok(tiers)
2196}
2197
2198// =============================================================================
2199// incant! macro - dispatch to platform-specific variants
2200// =============================================================================
2201
2202/// Input for the incant! macro
2203struct IncantInput {
2204    /// Function path to call (e.g. `func` or `module::func`)
2205    func_path: syn::Path,
2206    /// Arguments to pass
2207    args: Vec<syn::Expr>,
2208    /// Optional token variable for passthrough mode
2209    with_token: Option<syn::Expr>,
2210    /// Optional explicit tier list (None = default tiers)
2211    tiers: Option<(Vec<String>, proc_macro2::Span)>,
2212}
2213
2214/// Create a suffixed version of a function path.
2215/// e.g. `module::func` + `"v3"` → `module::func_v3`
2216fn suffix_path(path: &syn::Path, suffix: &str) -> syn::Path {
2217    let mut suffixed = path.clone();
2218    if let Some(last) = suffixed.segments.last_mut() {
2219        last.ident = format_ident!("{}_{}", last.ident, suffix);
2220    }
2221    suffixed
2222}
2223
2224impl Parse for IncantInput {
2225    fn parse(input: ParseStream) -> syn::Result<Self> {
2226        // Parse: function_path(arg1, arg2, ...) [with token_expr] [, [tier1, tier2, ...]]
2227        let func_path: syn::Path = input.parse()?;
2228
2229        // Parse parenthesized arguments
2230        let content;
2231        syn::parenthesized!(content in input);
2232        let args = content
2233            .parse_terminated(syn::Expr::parse, Token![,])?
2234            .into_iter()
2235            .collect();
2236
2237        // Check for optional "with token"
2238        let with_token = if input.peek(Ident) {
2239            let kw: Ident = input.parse()?;
2240            if kw != "with" {
2241                return Err(syn::Error::new_spanned(kw, "expected `with` keyword"));
2242            }
2243            Some(input.parse()?)
2244        } else {
2245            None
2246        };
2247
2248        // Check for optional tier list: , [tier1, tier2, ...]
2249        let tiers = if input.peek(Token![,]) {
2250            let _: Token![,] = input.parse()?;
2251            let bracket_content;
2252            let bracket = syn::bracketed!(bracket_content in input);
2253            let tier_idents = bracket_content.parse_terminated(Ident::parse, Token![,])?;
2254            let tier_names: Vec<String> = tier_idents.iter().map(|i| i.to_string()).collect();
2255            Some((tier_names, bracket.span.join()))
2256        } else {
2257            None
2258        };
2259
2260        Ok(IncantInput {
2261            func_path,
2262            args,
2263            with_token,
2264            tiers,
2265        })
2266    }
2267}
2268
2269/// Dispatch to platform-specific SIMD variants.
2270///
2271/// # Entry Point Mode (no token yet)
2272///
2273/// Summons tokens and dispatches to the best available variant:
2274///
2275/// ```rust,ignore
2276/// pub fn public_api(data: &[f32]) -> f32 {
2277///     incant!(dot(data))
2278/// }
2279/// ```
2280///
2281/// Expands to runtime feature detection + dispatch to `dot_v3`, `dot_v4`,
2282/// `dot_neon`, `dot_wasm128`, or `dot_scalar`.
2283///
2284/// # Explicit Tiers
2285///
2286/// Specify which tiers to dispatch to:
2287///
2288/// ```rust,ignore
2289/// // Only dispatch to v1, v3, neon, and scalar
2290/// pub fn api(data: &[f32]) -> f32 {
2291///     incant!(process(data), [v1, v3, neon, scalar])
2292/// }
2293/// ```
2294///
2295/// Always include `scalar` in explicit tier lists — `incant!` always
2296/// emits a `fn_scalar()` call as the final fallback, and listing it
2297/// documents this dependency. Currently auto-appended if omitted;
2298/// will become a compile error in v1.0. Unknown tier names cause a
2299/// compile error. Tiers are automatically sorted into correct
2300/// dispatch order (highest priority first).
2301///
2302/// Known tiers: `v1`, `v2`, `v3`, `v4`, `v4x`, `neon`, `neon_aes`,
2303/// `neon_sha3`, `neon_crc`, `wasm128`, `wasm128_relaxed`, `scalar`.
2304///
2305/// # Passthrough Mode (already have token)
2306///
2307/// Uses compile-time dispatch via `IntoConcreteToken`:
2308///
2309/// ```rust,ignore
2310/// #[arcane]
2311/// fn outer(token: X64V3Token, data: &[f32]) -> f32 {
2312///     incant!(inner(data) with token)
2313/// }
2314/// ```
2315///
2316/// Also supports explicit tiers:
2317///
2318/// ```rust,ignore
2319/// fn inner<T: IntoConcreteToken>(token: T, data: &[f32]) -> f32 {
2320///     incant!(process(data) with token, [v3, neon, scalar])
2321/// }
2322/// ```
2323///
2324/// The compiler monomorphizes the dispatch, eliminating non-matching branches.
2325///
2326/// # Variant Naming
2327///
2328/// Functions must have suffixed variants matching the selected tiers:
2329/// - `_v1` for `X64V1Token`
2330/// - `_v2` for `X64V2Token`
2331/// - `_v3` for `X64V3Token`
2332/// - `_v4` for `X64V4Token` (requires `avx512` feature)
2333/// - `_v4x` for `X64V4xToken` (requires `avx512` feature)
2334/// - `_neon` for `NeonToken`
2335/// - `_neon_aes` for `NeonAesToken`
2336/// - `_neon_sha3` for `NeonSha3Token`
2337/// - `_neon_crc` for `NeonCrcToken`
2338/// - `_wasm128` for `Wasm128Token`
2339/// - `_scalar` for `ScalarToken`
2340#[proc_macro]
2341pub fn incant(input: TokenStream) -> TokenStream {
2342    let input = parse_macro_input!(input as IncantInput);
2343    incant_impl(input)
2344}
2345
2346/// Legacy alias for [`incant!`].
2347#[proc_macro]
2348pub fn simd_route(input: TokenStream) -> TokenStream {
2349    let input = parse_macro_input!(input as IncantInput);
2350    incant_impl(input)
2351}
2352
2353/// Descriptive alias for [`incant!`].
2354///
2355/// Dispatches to architecture-specific function variants at runtime.
2356/// Looks for suffixed functions (`_v3`, `_v4`, `_neon`, `_wasm128`, `_scalar`)
2357/// and calls the best one the CPU supports.
2358///
2359/// See [`incant!`] for full documentation and examples.
2360#[proc_macro]
2361pub fn dispatch_variant(input: TokenStream) -> TokenStream {
2362    let input = parse_macro_input!(input as IncantInput);
2363    incant_impl(input)
2364}
2365
2366fn incant_impl(input: IncantInput) -> TokenStream {
2367    let func_path = &input.func_path;
2368    let args = &input.args;
2369
2370    // Resolve tiers
2371    let tier_names: Vec<String> = match &input.tiers {
2372        Some((names, _)) => names.clone(),
2373        None => DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect(),
2374    };
2375    let last_segment_span = func_path
2376        .segments
2377        .last()
2378        .map(|s| s.ident.span())
2379        .unwrap_or_else(proc_macro2::Span::call_site);
2380    let error_span = input
2381        .tiers
2382        .as_ref()
2383        .map(|(_, span)| *span)
2384        .unwrap_or(last_segment_span);
2385
2386    // When the user specifies explicit tiers, require `scalar` in the list.
2387    // This forces acknowledgment that a scalar fallback path exists and must
2388    // be implemented. Default tiers (no bracket list) always include scalar.
2389    // TODO(v1.0): flip REQUIRE_EXPLICIT_SCALAR to true
2390    if REQUIRE_EXPLICIT_SCALAR
2391        && let Some((names, span)) = &input.tiers
2392        && !names.iter().any(|n| n == "scalar")
2393    {
2394        return syn::Error::new(
2395            *span,
2396            "explicit tier list must include `scalar`. \
2397             incant! always dispatches to fn_scalar() as the final fallback, \
2398             so `scalar` must appear in the tier list to acknowledge this. \
2399             Example: [v3, neon, scalar]",
2400        )
2401        .to_compile_error()
2402        .into();
2403    }
2404
2405    let tiers = match resolve_tiers(&tier_names, error_span) {
2406        Ok(t) => t,
2407        Err(e) => return e.to_compile_error().into(),
2408    };
2409
2410    // Group tiers by architecture for cfg-guarded blocks
2411    // Within each arch, tiers are already sorted by priority (highest first)
2412    if let Some(token_expr) = &input.with_token {
2413        gen_incant_passthrough(func_path, args, token_expr, &tiers)
2414    } else {
2415        gen_incant_entry(func_path, args, &tiers)
2416    }
2417}
2418
2419/// Generate incant! passthrough mode (already have a token).
2420fn gen_incant_passthrough(
2421    func_path: &syn::Path,
2422    args: &[syn::Expr],
2423    token_expr: &syn::Expr,
2424    tiers: &[&TierDescriptor],
2425) -> TokenStream {
2426    let mut dispatch_arms = Vec::new();
2427
2428    // Group non-scalar tiers by target_arch for cfg blocks
2429    let mut arch_groups: Vec<(Option<&str>, Vec<&TierDescriptor>)> = Vec::new();
2430    for tier in tiers {
2431        if tier.name == "scalar" {
2432            continue; // Handle scalar separately at the end
2433        }
2434        if let Some(group) = arch_groups.iter_mut().find(|(a, _)| *a == tier.target_arch) {
2435            group.1.push(tier);
2436        } else {
2437            arch_groups.push((tier.target_arch, vec![tier]));
2438        }
2439    }
2440
2441    for (target_arch, group_tiers) in &arch_groups {
2442        let mut tier_checks = Vec::new();
2443        for tier in group_tiers {
2444            let fn_suffixed = suffix_path(func_path, tier.suffix);
2445            let as_method = format_ident!("{}", tier.as_method);
2446            tier_checks.push(quote! {
2447                if let Some(__t) = __incant_token.#as_method() {
2448                    break '__incant #fn_suffixed(__t, #(#args),*);
2449                }
2450            });
2451        }
2452
2453        let inner = quote! { #(#tier_checks)* };
2454
2455        if let Some(arch) = target_arch {
2456            dispatch_arms.push(quote! {
2457                #[cfg(target_arch = #arch)]
2458                { #inner }
2459            });
2460        } else {
2461            dispatch_arms.push(inner);
2462        }
2463    }
2464
2465    // Scalar fallback (always last)
2466    let fn_scalar = suffix_path(func_path, "scalar");
2467    let scalar_arm = if tiers.iter().any(|t| t.name == "scalar") {
2468        quote! {
2469            if let Some(__t) = __incant_token.as_scalar() {
2470                break '__incant #fn_scalar(__t, #(#args),*);
2471            }
2472            unreachable!("Token did not match any known variant")
2473        }
2474    } else {
2475        quote! { unreachable!("Token did not match any known variant") }
2476    };
2477
2478    let expanded = quote! {
2479        '__incant: {
2480            use archmage::IntoConcreteToken;
2481            let __incant_token = #token_expr;
2482            #(#dispatch_arms)*
2483            #scalar_arm
2484        }
2485    };
2486    expanded.into()
2487}
2488
2489/// Generate incant! entry point mode (summon tokens).
2490fn gen_incant_entry(
2491    func_path: &syn::Path,
2492    args: &[syn::Expr],
2493    tiers: &[&TierDescriptor],
2494) -> TokenStream {
2495    let mut dispatch_arms = Vec::new();
2496
2497    // Group non-scalar tiers by target_arch for cfg blocks.
2498    let mut arch_groups: Vec<(Option<&str>, Vec<&TierDescriptor>)> = Vec::new();
2499    for tier in tiers {
2500        if tier.name == "scalar" {
2501            continue;
2502        }
2503        if let Some(group) = arch_groups.iter_mut().find(|(a, _)| *a == tier.target_arch) {
2504            group.1.push(tier);
2505        } else {
2506            arch_groups.push((tier.target_arch, vec![tier]));
2507        }
2508    }
2509
2510    for (target_arch, group_tiers) in &arch_groups {
2511        let mut tier_checks = Vec::new();
2512        for tier in group_tiers {
2513            let fn_suffixed = suffix_path(func_path, tier.suffix);
2514            let token_path: syn::Path = syn::parse_str(tier.token_path).unwrap();
2515
2516            tier_checks.push(quote! {
2517                if let Some(__t) = #token_path::summon() {
2518                    break '__incant #fn_suffixed(__t, #(#args),*);
2519                }
2520            });
2521        }
2522
2523        let inner = quote! { #(#tier_checks)* };
2524
2525        if let Some(arch) = target_arch {
2526            dispatch_arms.push(quote! {
2527                #[cfg(target_arch = #arch)]
2528                { #inner }
2529            });
2530        } else {
2531            dispatch_arms.push(inner);
2532        }
2533    }
2534
2535    // Scalar fallback
2536    let fn_scalar = suffix_path(func_path, "scalar");
2537
2538    let expanded = quote! {
2539        '__incant: {
2540            use archmage::SimdToken;
2541            #(#dispatch_arms)*
2542            #fn_scalar(archmage::ScalarToken, #(#args),*)
2543        }
2544    };
2545    expanded.into()
2546}
2547
2548// =============================================================================
2549// autoversion - combined variant generation + dispatch
2550// =============================================================================
2551
2552/// Arguments to the `#[autoversion]` macro.
2553struct AutoversionArgs {
2554    /// The concrete type to use for `self` receiver (inherent methods only).
2555    self_type: Option<Type>,
2556    /// Explicit tier names (None = default tiers).
2557    tiers: Option<Vec<String>>,
2558}
2559
2560impl Parse for AutoversionArgs {
2561    fn parse(input: ParseStream) -> syn::Result<Self> {
2562        let mut self_type = None;
2563        let mut tier_names = Vec::new();
2564
2565        while !input.is_empty() {
2566            let ident: Ident = input.parse()?;
2567            if ident == "_self" {
2568                let _: Token![=] = input.parse()?;
2569                self_type = Some(input.parse()?);
2570            } else {
2571                // Treat as tier name — validated later by resolve_tiers
2572                tier_names.push(ident.to_string());
2573            }
2574            if input.peek(Token![,]) {
2575                let _: Token![,] = input.parse()?;
2576            }
2577        }
2578
2579        Ok(AutoversionArgs {
2580            self_type,
2581            tiers: if tier_names.is_empty() {
2582                None
2583            } else {
2584                Some(tier_names)
2585            },
2586        })
2587    }
2588}
2589
2590/// Information about the `SimdToken` parameter found in a function signature.
2591struct SimdTokenParamInfo {
2592    /// Index of the parameter in `sig.inputs`
2593    index: usize,
2594    /// The parameter identifier
2595    #[allow(dead_code)]
2596    ident: Ident,
2597}
2598
2599/// Find the `SimdToken` parameter in a function signature.
2600///
2601/// Searches all typed parameters for one whose type path ends in `SimdToken`.
2602/// Returns the parameter index and identifier, or `None` if not found.
2603fn find_simd_token_param(sig: &Signature) -> Option<SimdTokenParamInfo> {
2604    for (i, arg) in sig.inputs.iter().enumerate() {
2605        if let FnArg::Typed(PatType { pat, ty, .. }) = arg
2606            && let Type::Path(type_path) = ty.as_ref()
2607            && let Some(seg) = type_path.path.segments.last()
2608            && seg.ident == "SimdToken"
2609        {
2610            let ident = match pat.as_ref() {
2611                syn::Pat::Ident(pi) => pi.ident.clone(),
2612                syn::Pat::Wild(w) => Ident::new("__autoversion_token", w.underscore_token.span),
2613                _ => continue,
2614            };
2615            return Some(SimdTokenParamInfo { index: i, ident });
2616        }
2617    }
2618    None
2619}
2620
2621/// Core implementation for `#[autoversion]`.
2622///
2623/// Generates suffixed SIMD variants (like `#[magetypes]`) and a runtime
2624/// dispatcher function (like `incant!`) from a single annotated function.
2625fn autoversion_impl(mut input_fn: LightFn, args: AutoversionArgs) -> TokenStream {
2626    // Check for self receiver
2627    let has_self = input_fn
2628        .sig
2629        .inputs
2630        .first()
2631        .is_some_and(|arg| matches!(arg, FnArg::Receiver(_)));
2632
2633    // _self = Type is only needed for trait impls (nested mode in #[arcane]).
2634    // For inherent methods, self/Self work naturally in sibling mode.
2635
2636    // Find SimdToken parameter
2637    let token_param = match find_simd_token_param(&input_fn.sig) {
2638        Some(p) => p,
2639        None => {
2640            return syn::Error::new_spanned(
2641                &input_fn.sig,
2642                "autoversion requires a `SimdToken` parameter.\n\
2643                 Example: fn process(token: SimdToken, data: &[f32]) -> f32 { ... }\n\n\
2644                 SimdToken is the dispatch placeholder — autoversion replaces it \
2645                 with concrete token types and generates a runtime dispatcher.",
2646            )
2647            .to_compile_error()
2648            .into();
2649        }
2650    };
2651
2652    // Resolve tiers — autoversion always includes v4 in its defaults because it
2653    // generates scalar code compiled with #[target_feature], not import_intrinsics.
2654    let tier_names: Vec<String> = match &args.tiers {
2655        Some(names) => names.clone(),
2656        None => AUTOVERSION_DEFAULT_TIER_NAMES
2657            .iter()
2658            .map(|s| s.to_string())
2659            .collect(),
2660    };
2661    let tiers = match resolve_tiers(&tier_names, input_fn.sig.ident.span()) {
2662        Ok(t) => t,
2663        Err(e) => return e.to_compile_error().into(),
2664    };
2665
2666    // Strip #[arcane] / #[rite] to prevent double-wrapping
2667    input_fn
2668        .attrs
2669        .retain(|attr| !attr.path().is_ident("arcane") && !attr.path().is_ident("rite"));
2670
2671    let fn_name = &input_fn.sig.ident;
2672    let vis = input_fn.vis.clone();
2673
2674    // Move attrs to dispatcher only; variants get no user attrs
2675    let fn_attrs: Vec<Attribute> = input_fn.attrs.drain(..).collect();
2676
2677    // =========================================================================
2678    // Generate suffixed variants
2679    // =========================================================================
2680    //
2681    // AST manipulation only — we clone the parsed LightFn and swap the token
2682    // param's type annotation. No serialize/reparse round-trip. The body is
2683    // never touched unless _self = Type requires a `let _self = self;`
2684    // preamble on the scalar variant.
2685
2686    let mut variants = Vec::new();
2687
2688    for tier in &tiers {
2689        let mut variant_fn = input_fn.clone();
2690
2691        // Variants are always private — only the dispatcher is public.
2692        variant_fn.vis = syn::Visibility::Inherited;
2693
2694        // Rename: process → process_v3
2695        variant_fn.sig.ident = format_ident!("{}_{}", fn_name, tier.suffix);
2696
2697        // Replace SimdToken param type with concrete token type
2698        let concrete_type: Type = syn::parse_str(tier.token_path).unwrap();
2699        if let FnArg::Typed(pt) = &mut variant_fn.sig.inputs[token_param.index] {
2700            *pt.ty = concrete_type;
2701        }
2702
2703        // Scalar with _self = Type: inject `let _self = self;` preamble so body's
2704        // _self references resolve (non-scalar variants get this from #[arcane(_self = Type)])
2705        if tier.name == "scalar" && has_self && args.self_type.is_some() {
2706            let original_body = variant_fn.body.clone();
2707            variant_fn.body = quote!(let _self = self; #original_body);
2708        }
2709
2710        let cfg_guard = match tier.target_arch {
2711            Some(arch) => quote! { #[cfg(target_arch = #arch)] },
2712            None => quote! {},
2713        };
2714
2715        // All variants are private implementation details of the dispatcher.
2716        // Suppress dead_code: if the dispatcher is unused, rustc warns on IT
2717        // (via quote_spanned! with the user's span). Warning on individual
2718        // variants would be confusing — the user didn't write _scalar or _v3.
2719        if tier.name != "scalar" {
2720            let arcane_attr = if let Some(ref self_type) = args.self_type {
2721                quote! { #[archmage::arcane(_self = #self_type)] }
2722            } else {
2723                quote! { #[archmage::arcane] }
2724            };
2725            variants.push(quote! {
2726                #cfg_guard
2727                #[allow(dead_code)]
2728                #arcane_attr
2729                #variant_fn
2730            });
2731        } else {
2732            variants.push(quote! {
2733                #cfg_guard
2734                #[allow(dead_code)]
2735                #variant_fn
2736            });
2737        }
2738    }
2739
2740    // =========================================================================
2741    // Generate dispatcher (adapted from gen_incant_entry)
2742    // =========================================================================
2743
2744    // Build dispatcher inputs: original params minus SimdToken
2745    let mut dispatcher_inputs: Vec<FnArg> = input_fn.sig.inputs.iter().cloned().collect();
2746    dispatcher_inputs.remove(token_param.index);
2747
2748    // Rename wildcard params so we can pass them as arguments
2749    let mut wild_counter = 0u32;
2750    for arg in &mut dispatcher_inputs {
2751        if let FnArg::Typed(pat_type) = arg
2752            && matches!(pat_type.pat.as_ref(), syn::Pat::Wild(_))
2753        {
2754            let ident = format_ident!("__autoversion_wild_{}", wild_counter);
2755            wild_counter += 1;
2756            *pat_type.pat = syn::Pat::Ident(syn::PatIdent {
2757                attrs: vec![],
2758                by_ref: None,
2759                mutability: None,
2760                ident,
2761                subpat: None,
2762            });
2763        }
2764    }
2765
2766    // Collect argument idents for dispatch calls (exclude self receiver)
2767    let dispatch_args: Vec<Ident> = dispatcher_inputs
2768        .iter()
2769        .filter_map(|arg| {
2770            if let FnArg::Typed(PatType { pat, .. }) = arg
2771                && let syn::Pat::Ident(pi) = pat.as_ref()
2772            {
2773                return Some(pi.ident.clone());
2774            }
2775            None
2776        })
2777        .collect();
2778
2779    // Build turbofish for forwarding type/const generics to variant calls
2780    let turbofish = build_turbofish(&input_fn.sig.generics);
2781
2782    // Group non-scalar tiers by target_arch for cfg blocks
2783    let mut arch_groups: Vec<(Option<&str>, Vec<&&TierDescriptor>)> = Vec::new();
2784    for tier in &tiers {
2785        if tier.name == "scalar" {
2786            continue;
2787        }
2788        if let Some(group) = arch_groups.iter_mut().find(|(a, _)| *a == tier.target_arch) {
2789            group.1.push(tier);
2790        } else {
2791            arch_groups.push((tier.target_arch, vec![tier]));
2792        }
2793    }
2794
2795    let mut dispatch_arms = Vec::new();
2796    for (target_arch, group_tiers) in &arch_groups {
2797        let mut tier_checks = Vec::new();
2798        for tier in group_tiers {
2799            let suffixed = format_ident!("{}_{}", fn_name, tier.suffix);
2800            let token_path: syn::Path = syn::parse_str(tier.token_path).unwrap();
2801
2802            let call = if has_self {
2803                quote! { self.#suffixed #turbofish(__t, #(#dispatch_args),*) }
2804            } else {
2805                quote! { #suffixed #turbofish(__t, #(#dispatch_args),*) }
2806            };
2807
2808            tier_checks.push(quote! {
2809                if let Some(__t) = #token_path::summon() {
2810                    break '__dispatch #call;
2811                }
2812            });
2813        }
2814
2815        let inner = quote! { #(#tier_checks)* };
2816
2817        if let Some(arch) = target_arch {
2818            dispatch_arms.push(quote! {
2819                #[cfg(target_arch = #arch)]
2820                { #inner }
2821            });
2822        } else {
2823            dispatch_arms.push(inner);
2824        }
2825    }
2826
2827    // Scalar fallback (always available, no summon needed)
2828    let scalar_name = format_ident!("{}_scalar", fn_name);
2829    let scalar_call = if has_self {
2830        quote! { self.#scalar_name #turbofish(archmage::ScalarToken, #(#dispatch_args),*) }
2831    } else {
2832        quote! { #scalar_name #turbofish(archmage::ScalarToken, #(#dispatch_args),*) }
2833    };
2834
2835    // Build dispatcher function
2836    let dispatcher_inputs_punct: syn::punctuated::Punctuated<FnArg, Token![,]> =
2837        dispatcher_inputs.into_iter().collect();
2838    let output = &input_fn.sig.output;
2839    let generics = &input_fn.sig.generics;
2840    let where_clause = &generics.where_clause;
2841
2842    // Use the user's span for the dispatcher so dead_code lint fires on the
2843    // function the user actually wrote, not on invisible generated variants.
2844    let user_span = fn_name.span();
2845    let dispatcher = quote_spanned! { user_span =>
2846        #(#fn_attrs)*
2847        #vis fn #fn_name #generics (#dispatcher_inputs_punct) #output #where_clause {
2848            '__dispatch: {
2849                use archmage::SimdToken;
2850                #(#dispatch_arms)*
2851                #scalar_call
2852            }
2853        }
2854    };
2855
2856    let expanded = quote! {
2857        #dispatcher
2858        #(#variants)*
2859    };
2860
2861    expanded.into()
2862}
2863
2864/// Let the compiler auto-vectorize scalar code for each architecture.
2865///
2866/// Write a plain scalar function with a `SimdToken` placeholder parameter.
2867/// `#[autoversion]` generates architecture-specific copies — each compiled
2868/// with different `#[target_feature]` flags via `#[arcane]` — plus a runtime
2869/// dispatcher that calls the best one the CPU supports.
2870///
2871/// You don't touch intrinsics, don't import SIMD types, don't think about
2872/// lane widths. The compiler's auto-vectorizer does the work; you give it
2873/// permission via `#[target_feature]`, which `#[autoversion]` handles.
2874///
2875/// # The simple win
2876///
2877/// ```rust,ignore
2878/// use archmage::SimdToken;
2879///
2880/// #[autoversion]
2881/// fn sum_of_squares(_token: SimdToken, data: &[f32]) -> f32 {
2882///     let mut sum = 0.0f32;
2883///     for &x in data {
2884///         sum += x * x;
2885///     }
2886///     sum
2887/// }
2888///
2889/// // Call directly — no token, no unsafe:
2890/// let result = sum_of_squares(&my_data);
2891/// ```
2892///
2893/// The `_token` parameter is never used in the body. It exists so the macro
2894/// knows where to substitute concrete token types. Each generated variant
2895/// gets `#[arcane]` → `#[target_feature(enable = "avx2,fma,...")]`, which
2896/// unlocks the compiler's auto-vectorizer for that feature set.
2897///
2898/// On x86-64 with the `_v3` variant (AVX2+FMA), that loop compiles to
2899/// `vfmadd231ps` — fused multiply-add on 8 floats per cycle. On aarch64
2900/// with NEON, you get `fmla`. The `_scalar` fallback compiles without any
2901/// SIMD target features, as a safety net for unknown hardware.
2902///
2903/// # Chunks + remainder
2904///
2905/// The classic data-processing pattern works naturally:
2906///
2907/// ```rust,ignore
2908/// #[autoversion]
2909/// fn normalize(_token: SimdToken, data: &mut [f32], scale: f32) {
2910///     // Compiler auto-vectorizes this — no manual SIMD needed.
2911///     // On v3, this becomes vdivps + vmulps on 8 floats at a time.
2912///     for x in data.iter_mut() {
2913///         *x = (*x - 128.0) * scale;
2914///     }
2915/// }
2916/// ```
2917///
2918/// If you want explicit control over chunk boundaries (e.g., for
2919/// accumulator patterns), that works too:
2920///
2921/// ```rust,ignore
2922/// #[autoversion]
2923/// fn dot_product(_token: SimdToken, a: &[f32], b: &[f32]) -> f32 {
2924///     let n = a.len().min(b.len());
2925///     let mut sum = 0.0f32;
2926///     for i in 0..n {
2927///         sum += a[i] * b[i];
2928///     }
2929///     sum
2930/// }
2931/// ```
2932///
2933/// The compiler decides the chunk size based on the target features of each
2934/// variant (8 floats for AVX2, 4 for NEON, 1 for scalar).
2935///
2936/// # What gets generated
2937///
2938/// With default tiers, `#[autoversion] fn process(_t: SimdToken, data: &[f32]) -> f32`
2939/// expands to:
2940///
2941/// - `process_v4(token: X64V4Token, ...)` — AVX-512 (behind `#[cfg(feature = "avx512")]`)
2942/// - `process_v3(token: X64V3Token, ...)` — AVX2+FMA
2943/// - `process_neon(token: NeonToken, ...)` — aarch64 NEON
2944/// - `process_wasm128(token: Wasm128Token, ...)` — WASM SIMD
2945/// - `process_scalar(token: ScalarToken, ...)` — no SIMD, always available
2946/// - `process(data: &[f32]) -> f32` — **dispatcher** (SimdToken param removed)
2947///
2948/// Each non-scalar variant is wrapped in `#[arcane]` (for `#[target_feature]`)
2949/// and `#[cfg(target_arch = ...)]`. The dispatcher does runtime CPU feature
2950/// detection via `Token::summon()` and calls the best match. When compiled
2951/// with `-C target-cpu=native`, the detection is elided by the compiler.
2952///
2953/// The suffixed variants are private sibling functions — only the dispatcher
2954/// is public. Within the same module, you can call them directly for testing
2955/// or benchmarking.
2956///
2957/// # SimdToken replacement
2958///
2959/// `#[autoversion]` replaces the `SimdToken` type annotation in the function
2960/// signature with the concrete token type for each variant (e.g.,
2961/// `archmage::X64V3Token`). Only the parameter's type changes — the function
2962/// body is never reparsed, which keeps compile times low.
2963///
2964/// The token variable (whatever you named it — `token`, `_token`, `_t`)
2965/// keeps working in the body because its type comes from the signature.
2966/// So `f32x8::from_array(token, ...)` works — `token` is now an `X64V3Token`
2967/// which satisfies the same trait bounds as `SimdToken`.
2968///
2969/// `#[magetypes]` takes a different approach: it replaces the text `Token`
2970/// everywhere in the function — signature and body — via string substitution.
2971/// Use `#[magetypes]` when you need body-level type substitution (e.g.,
2972/// `Token`-dependent constants or type aliases that differ per variant).
2973/// Use `#[autoversion]` when you want compiler auto-vectorization of scalar
2974/// code with zero boilerplate.
2975///
2976/// # Benchmarking
2977///
2978/// Measure the speedup with a side-by-side comparison. The generated
2979/// `_scalar` variant serves as the baseline; the dispatcher picks the
2980/// best available:
2981///
2982/// ```rust,ignore
2983/// use criterion::{Criterion, black_box, criterion_group, criterion_main};
2984/// use archmage::SimdToken;
2985///
2986/// #[autoversion]
2987/// fn sum_squares(_token: SimdToken, data: &[f32]) -> f32 {
2988///     data.iter().map(|&x| x * x).fold(0.0f32, |a, b| a + b)
2989/// }
2990///
2991/// fn bench(c: &mut Criterion) {
2992///     let data: Vec<f32> = (0..4096).map(|i| i as f32 * 0.01).collect();
2993///     let mut group = c.benchmark_group("sum_squares");
2994///
2995///     // Dispatched — picks best available at runtime
2996///     group.bench_function("dispatched", |b| {
2997///         b.iter(|| sum_squares(black_box(&data)))
2998///     });
2999///
3000///     // Scalar baseline — no target_feature, no auto-vectorization
3001///     group.bench_function("scalar", |b| {
3002///         b.iter(|| sum_squares_scalar(archmage::ScalarToken, black_box(&data)))
3003///     });
3004///
3005///     // Specific tier (useful for isolating which tier wins)
3006///     #[cfg(target_arch = "x86_64")]
3007///     if let Some(t) = archmage::X64V3Token::summon() {
3008///         group.bench_function("v3_avx2_fma", |b| {
3009///             b.iter(|| sum_squares_v3(t, black_box(&data)));
3010///         });
3011///     }
3012///
3013///     group.finish();
3014/// }
3015///
3016/// criterion_group!(benches, bench);
3017/// criterion_main!(benches);
3018/// ```
3019///
3020/// For a tight numeric loop on x86-64, the `_v3` variant (AVX2+FMA)
3021/// typically runs 4-8x faster than `_scalar` because `#[target_feature]`
3022/// unlocks auto-vectorization that the baseline build can't use.
3023///
3024/// # Explicit tiers
3025///
3026/// ```rust,ignore
3027/// #[autoversion(v3, v4, v4x, neon, arm_v2, wasm128)]
3028/// fn process(_token: SimdToken, data: &[f32]) -> f32 {
3029///     // ...
3030/// }
3031/// ```
3032///
3033/// `scalar` is always included implicitly.
3034///
3035/// Default tiers (when no list given): `v4`, `v3`, `neon`, `wasm128`, `scalar`.
3036///
3037/// Known tiers: `v1`, `v2`, `v3`, `v3_crypto`, `v4`, `v4x`, `neon`,
3038/// `neon_aes`, `neon_sha3`, `neon_crc`, `arm_v2`, `arm_v3`, `wasm128`,
3039/// `wasm128_relaxed`, `x64_crypto`, `scalar`.
3040///
3041/// # Methods with self receivers
3042///
3043/// For inherent methods, `self` works naturally — no `_self` needed:
3044///
3045/// ```rust,ignore
3046/// impl ImageBuffer {
3047///     #[autoversion]
3048///     fn normalize(&mut self, token: SimdToken, gamma: f32) {
3049///         for pixel in &mut self.data {
3050///             *pixel = (*pixel / 255.0).powf(gamma);
3051///         }
3052///     }
3053/// }
3054///
3055/// // Call normally — no token:
3056/// buffer.normalize(2.2);
3057/// ```
3058///
3059/// All receiver types work: `self`, `&self`, `&mut self`. Non-scalar variants
3060/// get `#[arcane]` (sibling mode), where `self`/`Self` resolve naturally.
3061///
3062/// # Trait methods (requires `_self = Type`)
3063///
3064/// Trait methods can't use `#[autoversion]` directly because proc macro
3065/// attributes on trait impl items can't expand to multiple sibling functions.
3066/// Use the delegation pattern with `_self = Type`:
3067///
3068/// ```rust,ignore
3069/// trait Processor {
3070///     fn process(&self, data: &[f32]) -> f32;
3071/// }
3072///
3073/// impl Processor for MyType {
3074///     fn process(&self, data: &[f32]) -> f32 {
3075///         self.process_impl(data) // delegate to autoversioned method
3076///     }
3077/// }
3078///
3079/// impl MyType {
3080///     #[autoversion(_self = MyType)]
3081///     fn process_impl(&self, token: SimdToken, data: &[f32]) -> f32 {
3082///         _self.weights.iter().zip(data).map(|(w, d)| w * d).sum()
3083///     }
3084/// }
3085/// ```
3086///
3087/// `_self = Type` uses nested mode in `#[arcane]`, which is required for
3088/// trait impls. Use `_self` (not `self`) in the body when using this form.
3089///
3090/// # Comparison with `#[magetypes]` + `incant!`
3091///
3092/// | | `#[autoversion]` | `#[magetypes]` + `incant!` |
3093/// |---|---|---|
3094/// | Placeholder | `SimdToken` | `Token` |
3095/// | Generates variants | Yes | Yes (magetypes) |
3096/// | Generates dispatcher | Yes | No (you write `incant!`) |
3097/// | Best for | Scalar auto-vectorization | Explicit SIMD with typed vectors |
3098/// | Lines of code | 1 attribute | 2+ (magetypes + incant + arcane) |
3099///
3100/// Use `#[autoversion]` for scalar loops you want auto-vectorized. Use
3101/// `#[magetypes]` + `incant!` when you need `f32x8`, `u8x32`, and
3102/// hand-tuned SIMD code per architecture
3103#[proc_macro_attribute]
3104pub fn autoversion(attr: TokenStream, item: TokenStream) -> TokenStream {
3105    let args = parse_macro_input!(attr as AutoversionArgs);
3106    let input_fn = parse_macro_input!(item as LightFn);
3107    autoversion_impl(input_fn, args)
3108}
3109
3110// =============================================================================
3111// Unit tests for token/trait recognition maps
3112// =============================================================================
3113
3114#[cfg(test)]
3115mod tests {
3116    use super::*;
3117
3118    use super::generated::{ALL_CONCRETE_TOKENS, ALL_TRAIT_NAMES};
3119    use syn::{ItemFn, ReturnType};
3120
3121    #[test]
3122    fn every_concrete_token_is_in_token_to_features() {
3123        for &name in ALL_CONCRETE_TOKENS {
3124            assert!(
3125                token_to_features(name).is_some(),
3126                "Token `{}` exists in runtime crate but is NOT recognized by \
3127                 token_to_features() in the proc macro. Add it!",
3128                name
3129            );
3130        }
3131    }
3132
3133    #[test]
3134    fn every_trait_is_in_trait_to_features() {
3135        for &name in ALL_TRAIT_NAMES {
3136            assert!(
3137                trait_to_features(name).is_some(),
3138                "Trait `{}` exists in runtime crate but is NOT recognized by \
3139                 trait_to_features() in the proc macro. Add it!",
3140                name
3141            );
3142        }
3143    }
3144
3145    #[test]
3146    fn token_aliases_map_to_same_features() {
3147        // Desktop64 = X64V3Token
3148        assert_eq!(
3149            token_to_features("Desktop64"),
3150            token_to_features("X64V3Token"),
3151            "Desktop64 and X64V3Token should map to identical features"
3152        );
3153
3154        // Server64 = X64V4Token = Avx512Token
3155        assert_eq!(
3156            token_to_features("Server64"),
3157            token_to_features("X64V4Token"),
3158            "Server64 and X64V4Token should map to identical features"
3159        );
3160        assert_eq!(
3161            token_to_features("X64V4Token"),
3162            token_to_features("Avx512Token"),
3163            "X64V4Token and Avx512Token should map to identical features"
3164        );
3165
3166        // Arm64 = NeonToken
3167        assert_eq!(
3168            token_to_features("Arm64"),
3169            token_to_features("NeonToken"),
3170            "Arm64 and NeonToken should map to identical features"
3171        );
3172    }
3173
3174    #[test]
3175    fn trait_to_features_includes_tokens_as_bounds() {
3176        // Tier tokens should also work as trait bounds
3177        // (for `impl X64V3Token` patterns, even though Rust won't allow it,
3178        // the macro processes AST before type checking)
3179        let tier_tokens = [
3180            "X64V2Token",
3181            "X64CryptoToken",
3182            "X64V3Token",
3183            "Desktop64",
3184            "Avx2FmaToken",
3185            "X64V4Token",
3186            "Avx512Token",
3187            "Server64",
3188            "X64V4xToken",
3189            "Avx512Fp16Token",
3190            "NeonToken",
3191            "Arm64",
3192            "NeonAesToken",
3193            "NeonSha3Token",
3194            "NeonCrcToken",
3195            "Arm64V2Token",
3196            "Arm64V3Token",
3197        ];
3198
3199        for &name in &tier_tokens {
3200            assert!(
3201                trait_to_features(name).is_some(),
3202                "Tier token `{}` should also be recognized in trait_to_features() \
3203                 for use as a generic bound. Add it!",
3204                name
3205            );
3206        }
3207    }
3208
3209    #[test]
3210    fn trait_features_are_cumulative() {
3211        // HasX64V4 should include all HasX64V2 features plus more
3212        let v2_features = trait_to_features("HasX64V2").unwrap();
3213        let v4_features = trait_to_features("HasX64V4").unwrap();
3214
3215        for &f in v2_features {
3216            assert!(
3217                v4_features.contains(&f),
3218                "HasX64V4 should include v2 feature `{}` but doesn't",
3219                f
3220            );
3221        }
3222
3223        // v4 should have more features than v2
3224        assert!(
3225            v4_features.len() > v2_features.len(),
3226            "HasX64V4 should have more features than HasX64V2"
3227        );
3228    }
3229
3230    #[test]
3231    fn x64v3_trait_features_include_v2() {
3232        // X64V3Token as trait bound should include v2 features
3233        let v2 = trait_to_features("HasX64V2").unwrap();
3234        let v3 = trait_to_features("X64V3Token").unwrap();
3235
3236        for &f in v2 {
3237            assert!(
3238                v3.contains(&f),
3239                "X64V3Token trait features should include v2 feature `{}` but don't",
3240                f
3241            );
3242        }
3243    }
3244
3245    #[test]
3246    fn has_neon_aes_includes_neon() {
3247        let neon = trait_to_features("HasNeon").unwrap();
3248        let neon_aes = trait_to_features("HasNeonAes").unwrap();
3249
3250        for &f in neon {
3251            assert!(
3252                neon_aes.contains(&f),
3253                "HasNeonAes should include NEON feature `{}`",
3254                f
3255            );
3256        }
3257    }
3258
3259    #[test]
3260    fn no_removed_traits_are_recognized() {
3261        // These traits were removed in 0.3.0 and should NOT be recognized
3262        let removed = [
3263            "HasSse",
3264            "HasSse2",
3265            "HasSse41",
3266            "HasSse42",
3267            "HasAvx",
3268            "HasAvx2",
3269            "HasFma",
3270            "HasAvx512f",
3271            "HasAvx512bw",
3272            "HasAvx512vl",
3273            "HasAvx512vbmi2",
3274            "HasSve",
3275            "HasSve2",
3276        ];
3277
3278        for &name in &removed {
3279            assert!(
3280                trait_to_features(name).is_none(),
3281                "Removed trait `{}` should NOT be in trait_to_features(). \
3282                 It was removed in 0.3.0 — users should migrate to tier traits.",
3283                name
3284            );
3285        }
3286    }
3287
3288    #[test]
3289    fn no_nonexistent_tokens_are_recognized() {
3290        // These tokens don't exist and should NOT be recognized
3291        let fake = [
3292            "SveToken",
3293            "Sve2Token",
3294            "Avx512VnniToken",
3295            "X64V4ModernToken",
3296            "NeonFp16Token",
3297        ];
3298
3299        for &name in &fake {
3300            assert!(
3301                token_to_features(name).is_none(),
3302                "Non-existent token `{}` should NOT be in token_to_features()",
3303                name
3304            );
3305        }
3306    }
3307
3308    #[test]
3309    fn featureless_traits_are_not_in_registries() {
3310        // SimdToken and IntoConcreteToken should NOT be in any feature registry
3311        // because they don't map to CPU features
3312        for &name in FEATURELESS_TRAIT_NAMES {
3313            assert!(
3314                token_to_features(name).is_none(),
3315                "`{}` should NOT be in token_to_features() — it has no CPU features",
3316                name
3317            );
3318            assert!(
3319                trait_to_features(name).is_none(),
3320                "`{}` should NOT be in trait_to_features() — it has no CPU features",
3321                name
3322            );
3323        }
3324    }
3325
3326    #[test]
3327    fn find_featureless_trait_detects_simdtoken() {
3328        let names = vec!["SimdToken".to_string()];
3329        assert_eq!(find_featureless_trait(&names), Some("SimdToken"));
3330
3331        let names = vec!["IntoConcreteToken".to_string()];
3332        assert_eq!(find_featureless_trait(&names), Some("IntoConcreteToken"));
3333
3334        // Feature-bearing traits should NOT be detected
3335        let names = vec!["HasX64V2".to_string()];
3336        assert_eq!(find_featureless_trait(&names), None);
3337
3338        let names = vec!["HasNeon".to_string()];
3339        assert_eq!(find_featureless_trait(&names), None);
3340
3341        // Mixed: if SimdToken is among real traits, still detected
3342        let names = vec!["SimdToken".to_string(), "HasX64V2".to_string()];
3343        assert_eq!(find_featureless_trait(&names), Some("SimdToken"));
3344    }
3345
3346    #[test]
3347    fn arm64_v2_v3_traits_are_cumulative() {
3348        let v2_features = trait_to_features("HasArm64V2").unwrap();
3349        let v3_features = trait_to_features("HasArm64V3").unwrap();
3350
3351        for &f in v2_features {
3352            assert!(
3353                v3_features.contains(&f),
3354                "HasArm64V3 should include v2 feature `{}` but doesn't",
3355                f
3356            );
3357        }
3358
3359        assert!(
3360            v3_features.len() > v2_features.len(),
3361            "HasArm64V3 should have more features than HasArm64V2"
3362        );
3363    }
3364
3365    // =========================================================================
3366    // autoversion — argument parsing
3367    // =========================================================================
3368
3369    #[test]
3370    fn autoversion_args_empty() {
3371        let args: AutoversionArgs = syn::parse_str("").unwrap();
3372        assert!(args.self_type.is_none());
3373        assert!(args.tiers.is_none());
3374    }
3375
3376    #[test]
3377    fn autoversion_args_single_tier() {
3378        let args: AutoversionArgs = syn::parse_str("v3").unwrap();
3379        assert!(args.self_type.is_none());
3380        assert_eq!(args.tiers.as_ref().unwrap(), &["v3"]);
3381    }
3382
3383    #[test]
3384    fn autoversion_args_tiers_only() {
3385        let args: AutoversionArgs = syn::parse_str("v3, v4, neon").unwrap();
3386        assert!(args.self_type.is_none());
3387        let tiers = args.tiers.unwrap();
3388        assert_eq!(tiers, vec!["v3", "v4", "neon"]);
3389    }
3390
3391    #[test]
3392    fn autoversion_args_many_tiers() {
3393        let args: AutoversionArgs =
3394            syn::parse_str("v1, v2, v3, v4, v4x, neon, arm_v2, wasm128").unwrap();
3395        assert_eq!(
3396            args.tiers.unwrap(),
3397            vec!["v1", "v2", "v3", "v4", "v4x", "neon", "arm_v2", "wasm128"]
3398        );
3399    }
3400
3401    #[test]
3402    fn autoversion_args_trailing_comma() {
3403        let args: AutoversionArgs = syn::parse_str("v3, v4,").unwrap();
3404        assert_eq!(args.tiers.as_ref().unwrap(), &["v3", "v4"]);
3405    }
3406
3407    #[test]
3408    fn autoversion_args_self_only() {
3409        let args: AutoversionArgs = syn::parse_str("_self = MyType").unwrap();
3410        assert!(args.self_type.is_some());
3411        assert!(args.tiers.is_none());
3412    }
3413
3414    #[test]
3415    fn autoversion_args_self_and_tiers() {
3416        let args: AutoversionArgs = syn::parse_str("_self = MyType, v3, neon").unwrap();
3417        assert!(args.self_type.is_some());
3418        let tiers = args.tiers.unwrap();
3419        assert_eq!(tiers, vec!["v3", "neon"]);
3420    }
3421
3422    #[test]
3423    fn autoversion_args_tiers_then_self() {
3424        // _self can appear after tier names
3425        let args: AutoversionArgs = syn::parse_str("v3, neon, _self = MyType").unwrap();
3426        assert!(args.self_type.is_some());
3427        let tiers = args.tiers.unwrap();
3428        assert_eq!(tiers, vec!["v3", "neon"]);
3429    }
3430
3431    #[test]
3432    fn autoversion_args_self_with_path_type() {
3433        let args: AutoversionArgs = syn::parse_str("_self = crate::MyType").unwrap();
3434        assert!(args.self_type.is_some());
3435        assert!(args.tiers.is_none());
3436    }
3437
3438    #[test]
3439    fn autoversion_args_self_with_generic_type() {
3440        let args: AutoversionArgs = syn::parse_str("_self = Vec<u8>").unwrap();
3441        assert!(args.self_type.is_some());
3442        let ty_str = args.self_type.unwrap().to_token_stream().to_string();
3443        assert!(ty_str.contains("Vec"), "Expected Vec<u8>, got: {}", ty_str);
3444    }
3445
3446    #[test]
3447    fn autoversion_args_self_trailing_comma() {
3448        let args: AutoversionArgs = syn::parse_str("_self = MyType,").unwrap();
3449        assert!(args.self_type.is_some());
3450        assert!(args.tiers.is_none());
3451    }
3452
3453    // =========================================================================
3454    // autoversion — find_simd_token_param
3455    // =========================================================================
3456
3457    #[test]
3458    fn find_simd_token_param_first_position() {
3459        let f: ItemFn =
3460            syn::parse_str("fn process(token: SimdToken, data: &[f32]) -> f32 {}").unwrap();
3461        let param = find_simd_token_param(&f.sig).unwrap();
3462        assert_eq!(param.index, 0);
3463        assert_eq!(param.ident, "token");
3464    }
3465
3466    #[test]
3467    fn find_simd_token_param_second_position() {
3468        let f: ItemFn =
3469            syn::parse_str("fn process(data: &[f32], token: SimdToken) -> f32 {}").unwrap();
3470        let param = find_simd_token_param(&f.sig).unwrap();
3471        assert_eq!(param.index, 1);
3472        assert_eq!(param.ident, "token");
3473    }
3474
3475    #[test]
3476    fn find_simd_token_param_underscore_prefix() {
3477        let f: ItemFn =
3478            syn::parse_str("fn process(_token: SimdToken, data: &[f32]) -> f32 {}").unwrap();
3479        let param = find_simd_token_param(&f.sig).unwrap();
3480        assert_eq!(param.index, 0);
3481        assert_eq!(param.ident, "_token");
3482    }
3483
3484    #[test]
3485    fn find_simd_token_param_wildcard() {
3486        let f: ItemFn = syn::parse_str("fn process(_: SimdToken, data: &[f32]) -> f32 {}").unwrap();
3487        let param = find_simd_token_param(&f.sig).unwrap();
3488        assert_eq!(param.index, 0);
3489        assert_eq!(param.ident, "__autoversion_token");
3490    }
3491
3492    #[test]
3493    fn find_simd_token_param_not_found() {
3494        let f: ItemFn = syn::parse_str("fn process(data: &[f32]) -> f32 {}").unwrap();
3495        assert!(find_simd_token_param(&f.sig).is_none());
3496    }
3497
3498    #[test]
3499    fn find_simd_token_param_no_params() {
3500        let f: ItemFn = syn::parse_str("fn process() {}").unwrap();
3501        assert!(find_simd_token_param(&f.sig).is_none());
3502    }
3503
3504    #[test]
3505    fn find_simd_token_param_concrete_token_not_matched() {
3506        // autoversion looks specifically for SimdToken, not concrete tokens
3507        let f: ItemFn =
3508            syn::parse_str("fn process(token: X64V3Token, data: &[f32]) -> f32 {}").unwrap();
3509        assert!(find_simd_token_param(&f.sig).is_none());
3510    }
3511
3512    #[test]
3513    fn find_simd_token_param_scalar_token_not_matched() {
3514        let f: ItemFn =
3515            syn::parse_str("fn process(token: ScalarToken, data: &[f32]) -> f32 {}").unwrap();
3516        assert!(find_simd_token_param(&f.sig).is_none());
3517    }
3518
3519    #[test]
3520    fn find_simd_token_param_among_many() {
3521        let f: ItemFn = syn::parse_str(
3522            "fn process(a: i32, b: f64, token: SimdToken, c: &str, d: bool) -> f32 {}",
3523        )
3524        .unwrap();
3525        let param = find_simd_token_param(&f.sig).unwrap();
3526        assert_eq!(param.index, 2);
3527        assert_eq!(param.ident, "token");
3528    }
3529
3530    #[test]
3531    fn find_simd_token_param_with_generics() {
3532        let f: ItemFn =
3533            syn::parse_str("fn process<T: Clone>(token: SimdToken, data: &[T]) -> T {}").unwrap();
3534        let param = find_simd_token_param(&f.sig).unwrap();
3535        assert_eq!(param.index, 0);
3536        assert_eq!(param.ident, "token");
3537    }
3538
3539    #[test]
3540    fn find_simd_token_param_with_where_clause() {
3541        let f: ItemFn = syn::parse_str(
3542            "fn process<T>(token: SimdToken, data: &[T]) -> T where T: Copy + Default {}",
3543        )
3544        .unwrap();
3545        let param = find_simd_token_param(&f.sig).unwrap();
3546        assert_eq!(param.index, 0);
3547    }
3548
3549    #[test]
3550    fn find_simd_token_param_with_lifetime() {
3551        let f: ItemFn =
3552            syn::parse_str("fn process<'a>(token: SimdToken, data: &'a [f32]) -> &'a f32 {}")
3553                .unwrap();
3554        let param = find_simd_token_param(&f.sig).unwrap();
3555        assert_eq!(param.index, 0);
3556    }
3557
3558    // =========================================================================
3559    // autoversion — tier resolution
3560    // =========================================================================
3561
3562    #[test]
3563    fn autoversion_default_tiers_all_resolve() {
3564        let names: Vec<String> = DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect();
3565        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site()).unwrap();
3566        assert!(!tiers.is_empty());
3567        // scalar should be present
3568        assert!(tiers.iter().any(|t| t.name == "scalar"));
3569    }
3570
3571    #[test]
3572    fn autoversion_scalar_always_appended() {
3573        let names = vec!["v3".to_string(), "neon".to_string()];
3574        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site()).unwrap();
3575        assert!(
3576            tiers.iter().any(|t| t.name == "scalar"),
3577            "scalar must be auto-appended"
3578        );
3579    }
3580
3581    #[test]
3582    fn autoversion_scalar_not_duplicated() {
3583        let names = vec!["v3".to_string(), "scalar".to_string()];
3584        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site()).unwrap();
3585        let scalar_count = tiers.iter().filter(|t| t.name == "scalar").count();
3586        assert_eq!(scalar_count, 1, "scalar must not be duplicated");
3587    }
3588
3589    #[test]
3590    fn autoversion_tiers_sorted_by_priority() {
3591        let names = vec!["neon".to_string(), "v4".to_string(), "v3".to_string()];
3592        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site()).unwrap();
3593        // v4 (priority 40) > v3 (30) > neon (20) > scalar (0)
3594        let priorities: Vec<u32> = tiers.iter().map(|t| t.priority).collect();
3595        for window in priorities.windows(2) {
3596            assert!(
3597                window[0] >= window[1],
3598                "Tiers not sorted by priority: {:?}",
3599                priorities
3600            );
3601        }
3602    }
3603
3604    #[test]
3605    fn autoversion_unknown_tier_errors() {
3606        let names = vec!["v3".to_string(), "avx9000".to_string()];
3607        let result = resolve_tiers(&names, proc_macro2::Span::call_site());
3608        match result {
3609            Ok(_) => panic!("Expected error for unknown tier 'avx9000'"),
3610            Err(e) => {
3611                let err_msg = e.to_string();
3612                assert!(
3613                    err_msg.contains("avx9000"),
3614                    "Error should mention unknown tier: {}",
3615                    err_msg
3616                );
3617            }
3618        }
3619    }
3620
3621    #[test]
3622    fn autoversion_all_known_tiers_resolve() {
3623        // Every tier in ALL_TIERS should be findable
3624        for tier in ALL_TIERS {
3625            assert!(
3626                find_tier(tier.name).is_some(),
3627                "Tier '{}' should be findable by name",
3628                tier.name
3629            );
3630        }
3631    }
3632
3633    #[test]
3634    fn autoversion_default_tier_list_is_sensible() {
3635        // Defaults should cover x86, ARM, WASM, and scalar
3636        let names: Vec<String> = DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect();
3637        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site()).unwrap();
3638
3639        let has_x86 = tiers.iter().any(|t| t.target_arch == Some("x86_64"));
3640        let has_arm = tiers.iter().any(|t| t.target_arch == Some("aarch64"));
3641        let has_wasm = tiers.iter().any(|t| t.target_arch == Some("wasm32"));
3642        let has_scalar = tiers.iter().any(|t| t.name == "scalar");
3643
3644        assert!(has_x86, "Default tiers should include an x86_64 tier");
3645        assert!(has_arm, "Default tiers should include an aarch64 tier");
3646        assert!(has_wasm, "Default tiers should include a wasm32 tier");
3647        assert!(has_scalar, "Default tiers should include scalar");
3648    }
3649
3650    // =========================================================================
3651    // autoversion — variant replacement (AST manipulation)
3652    // =========================================================================
3653
3654    /// Mirrors what `autoversion_impl` does for a single variant: parse an
3655    /// ItemFn (for test convenience), rename it, swap the SimdToken param
3656    /// type, optionally inject the `_self` preamble for scalar+self.
3657    fn do_variant_replacement(func: &str, tier_name: &str, has_self: bool) -> ItemFn {
3658        let mut f: ItemFn = syn::parse_str(func).unwrap();
3659        let fn_name = f.sig.ident.to_string();
3660
3661        let tier = find_tier(tier_name).unwrap();
3662
3663        // Rename
3664        f.sig.ident = format_ident!("{}_{}", fn_name, tier.suffix);
3665
3666        // Find and replace SimdToken param type
3667        let token_idx = find_simd_token_param(&f.sig)
3668            .unwrap_or_else(|| panic!("No SimdToken param in: {}", func))
3669            .index;
3670        let concrete_type: Type = syn::parse_str(tier.token_path).unwrap();
3671        if let FnArg::Typed(pt) = &mut f.sig.inputs[token_idx] {
3672            *pt.ty = concrete_type;
3673        }
3674
3675        // Scalar + self: inject preamble
3676        if tier_name == "scalar" && has_self {
3677            let preamble: syn::Stmt = syn::parse_quote!(let _self = self;);
3678            f.block.stmts.insert(0, preamble);
3679        }
3680
3681        f
3682    }
3683
3684    #[test]
3685    fn variant_replacement_v3_renames_function() {
3686        let f = do_variant_replacement(
3687            "fn process(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3688            "v3",
3689            false,
3690        );
3691        assert_eq!(f.sig.ident, "process_v3");
3692    }
3693
3694    #[test]
3695    fn variant_replacement_v3_replaces_token_type() {
3696        let f = do_variant_replacement(
3697            "fn process(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3698            "v3",
3699            false,
3700        );
3701        let first_param_ty = match &f.sig.inputs[0] {
3702            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
3703            _ => panic!("Expected typed param"),
3704        };
3705        assert!(
3706            first_param_ty.contains("X64V3Token"),
3707            "Expected X64V3Token, got: {}",
3708            first_param_ty
3709        );
3710    }
3711
3712    #[test]
3713    fn variant_replacement_neon_produces_valid_fn() {
3714        let f = do_variant_replacement(
3715            "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3716            "neon",
3717            false,
3718        );
3719        assert_eq!(f.sig.ident, "compute_neon");
3720        let first_param_ty = match &f.sig.inputs[0] {
3721            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
3722            _ => panic!("Expected typed param"),
3723        };
3724        assert!(
3725            first_param_ty.contains("NeonToken"),
3726            "Expected NeonToken, got: {}",
3727            first_param_ty
3728        );
3729    }
3730
3731    #[test]
3732    fn variant_replacement_wasm128_produces_valid_fn() {
3733        let f = do_variant_replacement(
3734            "fn compute(_t: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3735            "wasm128",
3736            false,
3737        );
3738        assert_eq!(f.sig.ident, "compute_wasm128");
3739    }
3740
3741    #[test]
3742    fn variant_replacement_scalar_produces_valid_fn() {
3743        let f = do_variant_replacement(
3744            "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3745            "scalar",
3746            false,
3747        );
3748        assert_eq!(f.sig.ident, "compute_scalar");
3749        let first_param_ty = match &f.sig.inputs[0] {
3750            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
3751            _ => panic!("Expected typed param"),
3752        };
3753        assert!(
3754            first_param_ty.contains("ScalarToken"),
3755            "Expected ScalarToken, got: {}",
3756            first_param_ty
3757        );
3758    }
3759
3760    #[test]
3761    fn variant_replacement_v4_produces_valid_fn() {
3762        let f = do_variant_replacement(
3763            "fn transform(token: SimdToken, data: &mut [f32]) { }",
3764            "v4",
3765            false,
3766        );
3767        assert_eq!(f.sig.ident, "transform_v4");
3768        let first_param_ty = match &f.sig.inputs[0] {
3769            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
3770            _ => panic!("Expected typed param"),
3771        };
3772        assert!(
3773            first_param_ty.contains("X64V4Token"),
3774            "Expected X64V4Token, got: {}",
3775            first_param_ty
3776        );
3777    }
3778
3779    #[test]
3780    fn variant_replacement_v4x_produces_valid_fn() {
3781        let f = do_variant_replacement(
3782            "fn transform(token: SimdToken, data: &mut [f32]) { }",
3783            "v4x",
3784            false,
3785        );
3786        assert_eq!(f.sig.ident, "transform_v4x");
3787    }
3788
3789    #[test]
3790    fn variant_replacement_arm_v2_produces_valid_fn() {
3791        let f = do_variant_replacement(
3792            "fn transform(token: SimdToken, data: &mut [f32]) { }",
3793            "arm_v2",
3794            false,
3795        );
3796        assert_eq!(f.sig.ident, "transform_arm_v2");
3797    }
3798
3799    #[test]
3800    fn variant_replacement_preserves_generics() {
3801        let f = do_variant_replacement(
3802            "fn process<T: Copy + Default>(token: SimdToken, data: &[T]) -> T { T::default() }",
3803            "v3",
3804            false,
3805        );
3806        assert_eq!(f.sig.ident, "process_v3");
3807        // Generic params should still be present
3808        assert!(
3809            !f.sig.generics.params.is_empty(),
3810            "Generics should be preserved"
3811        );
3812    }
3813
3814    #[test]
3815    fn variant_replacement_preserves_where_clause() {
3816        let f = do_variant_replacement(
3817            "fn process<T>(token: SimdToken, data: &[T]) -> T where T: Copy + Default { T::default() }",
3818            "v3",
3819            false,
3820        );
3821        assert!(
3822            f.sig.generics.where_clause.is_some(),
3823            "Where clause should be preserved"
3824        );
3825    }
3826
3827    #[test]
3828    fn variant_replacement_preserves_return_type() {
3829        let f = do_variant_replacement(
3830            "fn process(token: SimdToken, data: &[f32]) -> Vec<f32> { vec![] }",
3831            "neon",
3832            false,
3833        );
3834        let ret = f.sig.output.to_token_stream().to_string();
3835        assert!(
3836            ret.contains("Vec"),
3837            "Return type should be preserved, got: {}",
3838            ret
3839        );
3840    }
3841
3842    #[test]
3843    fn variant_replacement_preserves_multiple_params() {
3844        let f = do_variant_replacement(
3845            "fn process(token: SimdToken, a: &[f32], b: &[f32], scale: f32) -> f32 { 0.0 }",
3846            "v3",
3847            false,
3848        );
3849        // SimdToken → X64V3Token, plus the 3 other params
3850        assert_eq!(f.sig.inputs.len(), 4);
3851    }
3852
3853    #[test]
3854    fn variant_replacement_preserves_no_return_type() {
3855        let f = do_variant_replacement(
3856            "fn transform(token: SimdToken, data: &mut [f32]) { }",
3857            "v3",
3858            false,
3859        );
3860        assert!(
3861            matches!(f.sig.output, ReturnType::Default),
3862            "No return type should remain as Default"
3863        );
3864    }
3865
3866    #[test]
3867    fn variant_replacement_preserves_lifetime_params() {
3868        let f = do_variant_replacement(
3869            "fn process<'a>(token: SimdToken, data: &'a [f32]) -> &'a [f32] { data }",
3870            "v3",
3871            false,
3872        );
3873        assert!(!f.sig.generics.params.is_empty());
3874    }
3875
3876    #[test]
3877    fn variant_replacement_scalar_self_injects_preamble() {
3878        let f = do_variant_replacement(
3879            "fn method(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3880            "scalar",
3881            true, // has_self
3882        );
3883        assert_eq!(f.sig.ident, "method_scalar");
3884
3885        // First statement should be `let _self = self;`
3886        let body_str = f.block.to_token_stream().to_string();
3887        assert!(
3888            body_str.contains("let _self = self"),
3889            "Scalar+self variant should have _self preamble, got: {}",
3890            body_str
3891        );
3892    }
3893
3894    #[test]
3895    fn variant_replacement_all_default_tiers_produce_valid_fns() {
3896        let names: Vec<String> = DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect();
3897        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site()).unwrap();
3898
3899        for tier in &tiers {
3900            let f = do_variant_replacement(
3901                "fn process(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3902                tier.name,
3903                false,
3904            );
3905            let expected_name = format!("process_{}", tier.suffix);
3906            assert_eq!(
3907                f.sig.ident.to_string(),
3908                expected_name,
3909                "Tier '{}' should produce function '{}'",
3910                tier.name,
3911                expected_name
3912            );
3913        }
3914    }
3915
3916    #[test]
3917    fn variant_replacement_all_known_tiers_produce_valid_fns() {
3918        for tier in ALL_TIERS {
3919            let f = do_variant_replacement(
3920                "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3921                tier.name,
3922                false,
3923            );
3924            let expected_name = format!("compute_{}", tier.suffix);
3925            assert_eq!(
3926                f.sig.ident.to_string(),
3927                expected_name,
3928                "Tier '{}' should produce function '{}'",
3929                tier.name,
3930                expected_name
3931            );
3932        }
3933    }
3934
3935    #[test]
3936    fn variant_replacement_no_simdtoken_remains() {
3937        for tier in ALL_TIERS {
3938            let f = do_variant_replacement(
3939                "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3940                tier.name,
3941                false,
3942            );
3943            let full_str = f.to_token_stream().to_string();
3944            assert!(
3945                !full_str.contains("SimdToken"),
3946                "Tier '{}' variant still contains 'SimdToken': {}",
3947                tier.name,
3948                full_str
3949            );
3950        }
3951    }
3952
3953    // =========================================================================
3954    // autoversion — cfg guard and tier descriptor properties
3955    // =========================================================================
3956
3957    #[test]
3958    fn tier_v3_targets_x86_64() {
3959        let tier = find_tier("v3").unwrap();
3960        assert_eq!(tier.target_arch, Some("x86_64"));
3961    }
3962
3963    #[test]
3964    fn tier_v4_targets_x86_64() {
3965        let tier = find_tier("v4").unwrap();
3966        assert_eq!(tier.target_arch, Some("x86_64"));
3967    }
3968
3969    #[test]
3970    fn tier_v4x_targets_x86_64() {
3971        let tier = find_tier("v4x").unwrap();
3972        assert_eq!(tier.target_arch, Some("x86_64"));
3973    }
3974
3975    #[test]
3976    fn tier_neon_targets_aarch64() {
3977        let tier = find_tier("neon").unwrap();
3978        assert_eq!(tier.target_arch, Some("aarch64"));
3979    }
3980
3981    #[test]
3982    fn tier_wasm128_targets_wasm32() {
3983        let tier = find_tier("wasm128").unwrap();
3984        assert_eq!(tier.target_arch, Some("wasm32"));
3985    }
3986
3987    #[test]
3988    fn tier_scalar_has_no_guards() {
3989        let tier = find_tier("scalar").unwrap();
3990        assert_eq!(tier.target_arch, None);
3991        assert_eq!(tier.priority, 0);
3992    }
3993
3994    #[test]
3995    fn tier_priorities_are_consistent() {
3996        // Higher-capability tiers within the same arch should have higher priority
3997        let v2 = find_tier("v2").unwrap();
3998        let v3 = find_tier("v3").unwrap();
3999        let v4 = find_tier("v4").unwrap();
4000        assert!(v4.priority > v3.priority);
4001        assert!(v3.priority > v2.priority);
4002
4003        let neon = find_tier("neon").unwrap();
4004        let arm_v2 = find_tier("arm_v2").unwrap();
4005        let arm_v3 = find_tier("arm_v3").unwrap();
4006        assert!(arm_v3.priority > arm_v2.priority);
4007        assert!(arm_v2.priority > neon.priority);
4008
4009        // scalar is lowest
4010        let scalar = find_tier("scalar").unwrap();
4011        assert!(neon.priority > scalar.priority);
4012        assert!(v2.priority > scalar.priority);
4013    }
4014
4015    // =========================================================================
4016    // autoversion — dispatcher structure
4017    // =========================================================================
4018
4019    #[test]
4020    fn dispatcher_param_removal_free_fn() {
4021        // Simulate what autoversion_impl does: remove the SimdToken param
4022        let f: ItemFn =
4023            syn::parse_str("fn process(token: SimdToken, data: &[f32], scale: f32) -> f32 { 0.0 }")
4024                .unwrap();
4025
4026        let token_param = find_simd_token_param(&f.sig).unwrap();
4027        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4028        dispatcher_inputs.remove(token_param.index);
4029
4030        // Should have 2 params remaining: data, scale
4031        assert_eq!(dispatcher_inputs.len(), 2);
4032
4033        // Neither should be SimdToken
4034        for arg in &dispatcher_inputs {
4035            if let FnArg::Typed(pt) = arg {
4036                let ty_str = pt.ty.to_token_stream().to_string();
4037                assert!(
4038                    !ty_str.contains("SimdToken"),
4039                    "SimdToken should be removed from dispatcher, found: {}",
4040                    ty_str
4041                );
4042            }
4043        }
4044    }
4045
4046    #[test]
4047    fn dispatcher_param_removal_token_only() {
4048        let f: ItemFn = syn::parse_str("fn process(token: SimdToken) -> f32 { 0.0 }").unwrap();
4049
4050        let token_param = find_simd_token_param(&f.sig).unwrap();
4051        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4052        dispatcher_inputs.remove(token_param.index);
4053
4054        // No params left — dispatcher takes no arguments
4055        assert_eq!(dispatcher_inputs.len(), 0);
4056    }
4057
4058    #[test]
4059    fn dispatcher_param_removal_token_last() {
4060        let f: ItemFn =
4061            syn::parse_str("fn process(data: &[f32], scale: f32, token: SimdToken) -> f32 { 0.0 }")
4062                .unwrap();
4063
4064        let token_param = find_simd_token_param(&f.sig).unwrap();
4065        assert_eq!(token_param.index, 2);
4066
4067        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4068        dispatcher_inputs.remove(token_param.index);
4069
4070        assert_eq!(dispatcher_inputs.len(), 2);
4071    }
4072
4073    #[test]
4074    fn dispatcher_dispatch_args_extraction() {
4075        // Test that we correctly extract idents for the dispatch call
4076        let f: ItemFn =
4077            syn::parse_str("fn process(data: &[f32], scale: f32) -> f32 { 0.0 }").unwrap();
4078
4079        let dispatch_args: Vec<String> = f
4080            .sig
4081            .inputs
4082            .iter()
4083            .filter_map(|arg| {
4084                if let FnArg::Typed(PatType { pat, .. }) = arg {
4085                    if let syn::Pat::Ident(pi) = pat.as_ref() {
4086                        return Some(pi.ident.to_string());
4087                    }
4088                }
4089                None
4090            })
4091            .collect();
4092
4093        assert_eq!(dispatch_args, vec!["data", "scale"]);
4094    }
4095
4096    #[test]
4097    fn dispatcher_wildcard_params_get_renamed() {
4098        let f: ItemFn = syn::parse_str("fn process(_: &[f32], _: f32) -> f32 { 0.0 }").unwrap();
4099
4100        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4101
4102        let mut wild_counter = 0u32;
4103        for arg in &mut dispatcher_inputs {
4104            if let FnArg::Typed(pat_type) = arg {
4105                if matches!(pat_type.pat.as_ref(), syn::Pat::Wild(_)) {
4106                    let ident = format_ident!("__autoversion_wild_{}", wild_counter);
4107                    wild_counter += 1;
4108                    *pat_type.pat = syn::Pat::Ident(syn::PatIdent {
4109                        attrs: vec![],
4110                        by_ref: None,
4111                        mutability: None,
4112                        ident,
4113                        subpat: None,
4114                    });
4115                }
4116            }
4117        }
4118
4119        // Both wildcards should be renamed
4120        assert_eq!(wild_counter, 2);
4121
4122        let names: Vec<String> = dispatcher_inputs
4123            .iter()
4124            .filter_map(|arg| {
4125                if let FnArg::Typed(PatType { pat, .. }) = arg {
4126                    if let syn::Pat::Ident(pi) = pat.as_ref() {
4127                        return Some(pi.ident.to_string());
4128                    }
4129                }
4130                None
4131            })
4132            .collect();
4133
4134        assert_eq!(names, vec!["__autoversion_wild_0", "__autoversion_wild_1"]);
4135    }
4136
4137    // =========================================================================
4138    // autoversion — suffix_path (reused in dispatch)
4139    // =========================================================================
4140
4141    #[test]
4142    fn suffix_path_simple() {
4143        let path: syn::Path = syn::parse_str("process").unwrap();
4144        let suffixed = suffix_path(&path, "v3");
4145        assert_eq!(suffixed.to_token_stream().to_string(), "process_v3");
4146    }
4147
4148    #[test]
4149    fn suffix_path_qualified() {
4150        let path: syn::Path = syn::parse_str("module::process").unwrap();
4151        let suffixed = suffix_path(&path, "neon");
4152        let s = suffixed.to_token_stream().to_string();
4153        assert!(
4154            s.contains("process_neon"),
4155            "Expected process_neon, got: {}",
4156            s
4157        );
4158    }
4159}