Skip to main content

archmage_macros/
lib.rs

1//! Proc-macros for archmage SIMD capability tokens.
2//!
3//! Provides `#[arcane]` attribute (with `#[arcane]` alias) to make raw intrinsics
4//! safe via token proof.
5
6use proc_macro::TokenStream;
7use quote::{ToTokens, format_ident, quote, quote_spanned};
8use syn::{
9    Attribute, FnArg, GenericParam, Ident, PatType, Signature, Token, Type, TypeParamBound,
10    parse::{Parse, ParseStream},
11    parse_macro_input, parse_quote, token,
12};
13
14/// A function parsed with the body left as an opaque TokenStream.
15///
16/// Only the signature is fully parsed into an AST — the body tokens are collected
17/// without building any AST nodes (no expressions, statements, or patterns parsed).
18/// This saves ~2ms per function invocation at 100 lines of code.
19#[derive(Clone)]
20struct LightFn {
21    attrs: Vec<Attribute>,
22    vis: syn::Visibility,
23    sig: Signature,
24    brace_token: token::Brace,
25    body: proc_macro2::TokenStream,
26}
27
28impl Parse for LightFn {
29    fn parse(input: ParseStream) -> syn::Result<Self> {
30        let attrs = input.call(Attribute::parse_outer)?;
31        let vis: syn::Visibility = input.parse()?;
32        let sig: Signature = input.parse()?;
33        let content;
34        let brace_token = syn::braced!(content in input);
35        let body: proc_macro2::TokenStream = content.parse()?;
36        Ok(LightFn {
37            attrs,
38            vis,
39            sig,
40            brace_token,
41            body,
42        })
43    }
44}
45
46impl ToTokens for LightFn {
47    fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
48        for attr in &self.attrs {
49            attr.to_tokens(tokens);
50        }
51        self.vis.to_tokens(tokens);
52        self.sig.to_tokens(tokens);
53        self.brace_token.surround(tokens, |tokens| {
54            self.body.to_tokens(tokens);
55        });
56    }
57}
58
59/// Filter out `#[inline]`, `#[inline(always)]`, `#[inline(never)]` from attributes.
60///
61/// Used to prevent duplicate inline attributes when the macro adds its own.
62/// Duplicate `#[inline]` is a warning that will become a hard error.
63fn filter_inline_attrs(attrs: &[Attribute]) -> Vec<&Attribute> {
64    attrs
65        .iter()
66        .filter(|attr| !attr.path().is_ident("inline"))
67        .collect()
68}
69
70/// Check if an attribute is a lint-control attribute.
71///
72/// Lint-control attributes (`#[allow(...)]`, `#[expect(...)]`, `#[deny(...)]`,
73/// `#[warn(...)]`, `#[forbid(...)]`) must be propagated to generated sibling
74/// functions so that user-applied lint suppressions work on the generated code.
75fn is_lint_attr(attr: &Attribute) -> bool {
76    let path = attr.path();
77    path.is_ident("allow")
78        || path.is_ident("expect")
79        || path.is_ident("deny")
80        || path.is_ident("warn")
81        || path.is_ident("forbid")
82}
83
84/// Extract lint-control attributes from a list of attributes.
85///
86/// Returns references to `#[allow(...)]`, `#[expect(...)]`, `#[deny(...)]`,
87/// `#[warn(...)]`, and `#[forbid(...)]` attributes. These need to be propagated
88/// to generated sibling functions so clippy/rustc lint suppressions work.
89fn filter_lint_attrs(attrs: &[Attribute]) -> Vec<&Attribute> {
90    attrs.iter().filter(|attr| is_lint_attr(attr)).collect()
91}
92
93/// Build a turbofish token stream from a function's generics.
94///
95/// Collects type and const generic parameters (skipping lifetimes) and returns
96/// a `::<A, B, N, M>` turbofish fragment. Returns empty tokens if there are no
97/// type/const generics to forward.
98///
99/// This is needed when the dispatcher or wrapper calls variant/sibling functions
100/// that have const generics not inferable from argument types alone.
101fn build_turbofish(generics: &syn::Generics) -> proc_macro2::TokenStream {
102    let params: Vec<proc_macro2::TokenStream> = generics
103        .params
104        .iter()
105        .filter_map(|param| match param {
106            GenericParam::Type(tp) => {
107                let ident = &tp.ident;
108                Some(quote! { #ident })
109            }
110            GenericParam::Const(cp) => {
111                let ident = &cp.ident;
112                Some(quote! { #ident })
113            }
114            GenericParam::Lifetime(_) => None,
115        })
116        .collect();
117    if params.is_empty() {
118        quote! {}
119    } else {
120        quote! { ::<#(#params),*> }
121    }
122}
123
124/// Replace all `Self` identifier tokens with a concrete type in a token stream.
125///
126/// Recurses into groups (braces, parens, brackets). Used for `#[arcane(_self = Type)]`
127/// to replace `Self` in both the return type and body without needing to parse the body.
128fn replace_self_in_tokens(
129    tokens: proc_macro2::TokenStream,
130    replacement: &Type,
131) -> proc_macro2::TokenStream {
132    let mut result = proc_macro2::TokenStream::new();
133    for tt in tokens {
134        match tt {
135            proc_macro2::TokenTree::Ident(ref ident) if ident == "Self" => {
136                result.extend(replacement.to_token_stream());
137            }
138            proc_macro2::TokenTree::Group(group) => {
139                let new_stream = replace_self_in_tokens(group.stream(), replacement);
140                let mut new_group = proc_macro2::Group::new(group.delimiter(), new_stream);
141                new_group.set_span(group.span());
142                result.extend(std::iter::once(proc_macro2::TokenTree::Group(new_group)));
143            }
144            other => {
145                result.extend(std::iter::once(other));
146            }
147        }
148    }
149    result
150}
151
152/// Arguments to the `#[arcane]` macro.
153#[derive(Default)]
154struct ArcaneArgs {
155    /// Use `#[inline(always)]` instead of `#[inline]` for the inner function.
156    /// Requires nightly Rust with `#![feature(target_feature_inline_always)]`.
157    inline_always: bool,
158    /// The concrete type to use for `self` receiver.
159    /// When specified, `self`/`&self`/`&mut self` is transformed to `_self: Type`/`&Type`/`&mut Type`.
160    /// Implies `nested = true`.
161    self_type: Option<Type>,
162    /// Generate an `unreachable!()` stub on the wrong architecture.
163    /// Default is false (cfg-out: no function emitted on wrong arch).
164    stub: bool,
165    /// Use nested inner function instead of sibling function.
166    /// Implied by `_self = Type`. Required for associated functions in impl blocks
167    /// that have no `self` receiver (the macro can't distinguish them from free functions).
168    nested: bool,
169    /// Inject `use archmage::intrinsics::{arch}::*;` (includes safe memory ops).
170    import_intrinsics: bool,
171    /// Inject `use magetypes::simd::{ns}::*;`, `use magetypes::simd::generic::*;`,
172    /// and `use magetypes::simd::backends::*;`.
173    import_magetypes: bool,
174}
175
176impl Parse for ArcaneArgs {
177    fn parse(input: ParseStream) -> syn::Result<Self> {
178        let mut args = ArcaneArgs::default();
179
180        while !input.is_empty() {
181            let ident: Ident = input.parse()?;
182            match ident.to_string().as_str() {
183                "inline_always" => args.inline_always = true,
184                "stub" => args.stub = true,
185                "nested" => args.nested = true,
186                "import_intrinsics" => args.import_intrinsics = true,
187                "import_magetypes" => args.import_magetypes = true,
188                "_self" => {
189                    let _: Token![=] = input.parse()?;
190                    args.self_type = Some(input.parse()?);
191                }
192                other => {
193                    return Err(syn::Error::new(
194                        ident.span(),
195                        format!("unknown arcane argument: `{}`", other),
196                    ));
197                }
198            }
199            // Consume optional comma
200            if input.peek(Token![,]) {
201                let _: Token![,] = input.parse()?;
202            }
203        }
204
205        // _self = Type implies nested (inner fn needed for Self replacement)
206        if args.self_type.is_some() {
207            args.nested = true;
208        }
209
210        Ok(args)
211    }
212}
213
214// Token-to-features and trait-to-features mappings are generated from
215// token-registry.toml by xtask. Regenerate with: cargo run -p xtask -- generate
216mod generated;
217use generated::{
218    canonical_token_to_tier_suffix, tier_to_canonical_token, token_to_arch, token_to_features,
219    token_to_magetypes_namespace, trait_to_arch, trait_to_features, trait_to_magetypes_namespace,
220};
221
222/// Result of extracting token info from a type.
223enum TokenTypeInfo {
224    /// Concrete token type (e.g., `Avx2Token`)
225    Concrete(String),
226    /// impl Trait with the trait names (e.g., `impl HasX64V2`)
227    ImplTrait(Vec<String>),
228    /// Generic type parameter name (e.g., `T`)
229    Generic(String),
230}
231
232/// Extract token type information from a type.
233fn extract_token_type_info(ty: &Type) -> Option<TokenTypeInfo> {
234    match ty {
235        Type::Path(type_path) => {
236            // Get the last segment of the path (e.g., "Avx2Token" from "archmage::Avx2Token")
237            type_path.path.segments.last().map(|seg| {
238                let name = seg.ident.to_string();
239                // Check if it's a known concrete token type
240                if token_to_features(&name).is_some() {
241                    TokenTypeInfo::Concrete(name)
242                } else {
243                    // Might be a generic type parameter like `T`
244                    TokenTypeInfo::Generic(name)
245                }
246            })
247        }
248        Type::Reference(type_ref) => {
249            // Handle &Token or &mut Token
250            extract_token_type_info(&type_ref.elem)
251        }
252        Type::ImplTrait(impl_trait) => {
253            // Handle `impl HasX64V2` or `impl HasX64V2 + HasNeon`
254            let traits: Vec<String> = extract_trait_names_from_bounds(&impl_trait.bounds);
255            if traits.is_empty() {
256                None
257            } else {
258                Some(TokenTypeInfo::ImplTrait(traits))
259            }
260        }
261        _ => None,
262    }
263}
264
265/// Extract trait names from type param bounds.
266fn extract_trait_names_from_bounds(
267    bounds: &syn::punctuated::Punctuated<TypeParamBound, Token![+]>,
268) -> Vec<String> {
269    bounds
270        .iter()
271        .filter_map(|bound| {
272            if let TypeParamBound::Trait(trait_bound) = bound {
273                trait_bound
274                    .path
275                    .segments
276                    .last()
277                    .map(|seg| seg.ident.to_string())
278            } else {
279                None
280            }
281        })
282        .collect()
283}
284
285/// Look up a generic type parameter in the function's generics.
286fn find_generic_bounds(sig: &Signature, type_name: &str) -> Option<Vec<String>> {
287    // Check inline bounds first (e.g., `fn foo<T: HasX64V2>(token: T)`)
288    for param in &sig.generics.params {
289        if let GenericParam::Type(type_param) = param
290            && type_param.ident == type_name
291        {
292            let traits = extract_trait_names_from_bounds(&type_param.bounds);
293            if !traits.is_empty() {
294                return Some(traits);
295            }
296        }
297    }
298
299    // Check where clause (e.g., `fn foo<T>(token: T) where T: HasX64V2`)
300    if let Some(where_clause) = &sig.generics.where_clause {
301        for predicate in &where_clause.predicates {
302            if let syn::WherePredicate::Type(pred_type) = predicate
303                && let Type::Path(type_path) = &pred_type.bounded_ty
304                && let Some(seg) = type_path.path.segments.last()
305                && seg.ident == type_name
306            {
307                let traits = extract_trait_names_from_bounds(&pred_type.bounds);
308                if !traits.is_empty() {
309                    return Some(traits);
310                }
311            }
312        }
313    }
314
315    None
316}
317
318/// Convert trait names to features, collecting all features from all traits.
319fn traits_to_features(trait_names: &[String]) -> Option<Vec<&'static str>> {
320    let mut all_features = Vec::new();
321
322    for trait_name in trait_names {
323        if let Some(features) = trait_to_features(trait_name) {
324            for &feature in features {
325                if !all_features.contains(&feature) {
326                    all_features.push(feature);
327                }
328            }
329        }
330    }
331
332    if all_features.is_empty() {
333        None
334    } else {
335        Some(all_features)
336    }
337}
338
339/// Trait names that don't map to any CPU features. These are valid in the type
340/// system but cannot be used as token bounds in `#[arcane]`/`#[rite]` because
341/// the macros need concrete features to generate `#[target_feature]` attributes.
342const FEATURELESS_TRAIT_NAMES: &[&str] = &["SimdToken", "IntoConcreteToken"];
343
344/// Check if any trait names are featureless (no CPU feature mapping).
345/// Returns the first featureless trait name found.
346fn find_featureless_trait(trait_names: &[String]) -> Option<&'static str> {
347    for name in trait_names {
348        for &featureless in FEATURELESS_TRAIT_NAMES {
349            if name == featureless {
350                return Some(featureless);
351            }
352        }
353    }
354    None
355}
356
357/// Diagnose why `find_token_param` failed. Returns the name of a featureless
358/// trait if the signature has a parameter bounded by one (e.g., `SimdToken`).
359fn diagnose_featureless_token(sig: &Signature) -> Option<&'static str> {
360    for arg in &sig.inputs {
361        if let FnArg::Typed(PatType { ty, .. }) = arg
362            && let Some(info) = extract_token_type_info(ty)
363        {
364            match &info {
365                TokenTypeInfo::ImplTrait(names) => {
366                    if let Some(name) = find_featureless_trait(names) {
367                        return Some(name);
368                    }
369                }
370                TokenTypeInfo::Generic(type_name) => {
371                    // Check if the type name itself is a featureless trait
372                    // (e.g., `token: SimdToken` used as a bare path)
373                    let as_vec = vec![type_name.clone()];
374                    if let Some(name) = find_featureless_trait(&as_vec) {
375                        return Some(name);
376                    }
377                    // Check generic bounds (e.g., `T: SimdToken`)
378                    if let Some(bounds) = find_generic_bounds(sig, type_name)
379                        && let Some(name) = find_featureless_trait(&bounds)
380                    {
381                        return Some(name);
382                    }
383                }
384                TokenTypeInfo::Concrete(_) => {}
385            }
386        }
387    }
388    None
389}
390
391/// Result of finding a token parameter in a function signature.
392struct TokenParamInfo {
393    /// The parameter identifier (e.g., `token`)
394    ident: Ident,
395    /// Target features to enable (e.g., `["avx2", "fma"]`)
396    features: Vec<&'static str>,
397    /// Target architecture (Some for concrete tokens, None for traits/generics)
398    target_arch: Option<&'static str>,
399    /// Concrete token type name (Some for concrete tokens, None for traits/generics)
400    token_type_name: Option<String>,
401    /// Magetypes width namespace (e.g., "v3", "neon", "wasm128")
402    magetypes_namespace: Option<&'static str>,
403}
404
405/// Resolve magetypes namespace from a list of trait names.
406/// Returns the first matching namespace found.
407fn traits_to_magetypes_namespace(trait_names: &[String]) -> Option<&'static str> {
408    for name in trait_names {
409        if let Some(ns) = trait_to_magetypes_namespace(name) {
410            return Some(ns);
411        }
412    }
413    None
414}
415
416/// Given trait bound names, return the first matching target architecture.
417fn traits_to_arch(trait_names: &[String]) -> Option<&'static str> {
418    for name in trait_names {
419        if let Some(arch) = trait_to_arch(name) {
420            return Some(arch);
421        }
422    }
423    None
424}
425
426/// Find the first token parameter in a function signature.
427fn find_token_param(sig: &Signature) -> Option<TokenParamInfo> {
428    for arg in &sig.inputs {
429        match arg {
430            FnArg::Receiver(_) => {
431                // Self receivers (self, &self, &mut self) are not yet supported.
432                // The macro creates an inner function, and Rust's inner functions
433                // cannot have `self` parameters. Supporting this would require
434                // AST rewriting to replace `self` with a regular parameter.
435                // See the module docs for the workaround.
436                continue;
437            }
438            FnArg::Typed(PatType { pat, ty, .. }) => {
439                if let Some(info) = extract_token_type_info(ty) {
440                    let (features, arch, token_name, mage_ns) = match info {
441                        TokenTypeInfo::Concrete(ref name) => {
442                            let features = token_to_features(name).map(|f| f.to_vec());
443                            let arch = token_to_arch(name);
444                            let ns = token_to_magetypes_namespace(name);
445                            (features, arch, Some(name.clone()), ns)
446                        }
447                        TokenTypeInfo::ImplTrait(ref trait_names) => {
448                            let ns = traits_to_magetypes_namespace(trait_names);
449                            let arch = traits_to_arch(trait_names);
450                            (traits_to_features(trait_names), arch, None, ns)
451                        }
452                        TokenTypeInfo::Generic(type_name) => {
453                            // Look up the generic parameter's bounds
454                            let bounds = find_generic_bounds(sig, &type_name);
455                            let features = bounds.as_ref().and_then(|t| traits_to_features(t));
456                            let ns = bounds
457                                .as_ref()
458                                .and_then(|t| traits_to_magetypes_namespace(t));
459                            let arch = bounds.as_ref().and_then(|t| traits_to_arch(t));
460                            (features, arch, None, ns)
461                        }
462                    };
463
464                    if let Some(features) = features {
465                        // Extract parameter name (or synthesize one for wildcard `_`)
466                        let ident = match pat.as_ref() {
467                            syn::Pat::Ident(pat_ident) => Some(pat_ident.ident.clone()),
468                            syn::Pat::Wild(w) => {
469                                Some(Ident::new("__archmage_token", w.underscore_token.span))
470                            }
471                            _ => None,
472                        };
473                        if let Some(ident) = ident {
474                            return Some(TokenParamInfo {
475                                ident,
476                                features,
477                                target_arch: arch,
478                                token_type_name: token_name,
479                                magetypes_namespace: mage_ns,
480                            });
481                        }
482                    }
483                }
484            }
485        }
486    }
487    None
488}
489
490/// Represents the kind of self receiver and the transformed parameter.
491enum SelfReceiver {
492    /// `self` (by value/move)
493    Owned,
494    /// `&self` (shared reference)
495    Ref,
496    /// `&mut self` (mutable reference)
497    RefMut,
498}
499
500/// Generate import statements to prepend to a function body.
501///
502/// Returns a `TokenStream` of `use` statements based on the import flags,
503/// target architecture, and magetypes namespace.
504fn generate_imports(
505    target_arch: Option<&str>,
506    magetypes_namespace: Option<&str>,
507    import_intrinsics: bool,
508    import_magetypes: bool,
509) -> proc_macro2::TokenStream {
510    let mut imports = proc_macro2::TokenStream::new();
511
512    if import_intrinsics && let Some(arch) = target_arch {
513        let arch_ident = format_ident!("{}", arch);
514        imports.extend(quote! {
515            #[allow(unused_imports)]
516            use archmage::intrinsics::#arch_ident::*;
517        });
518        // ScalarToken or unknown arch: import_intrinsics is a no-op
519    }
520
521    if import_magetypes && let Some(ns) = magetypes_namespace {
522        let ns_ident = format_ident!("{}", ns);
523        imports.extend(quote! {
524            #[allow(unused_imports)]
525            use magetypes::simd::#ns_ident::*;
526            #[allow(unused_imports)]
527            use magetypes::simd::backends::*;
528        });
529    }
530
531    imports
532}
533
534/// Shared implementation for arcane/arcane macros.
535fn arcane_impl(mut input_fn: LightFn, macro_name: &str, args: ArcaneArgs) -> TokenStream {
536    // Check for self receiver
537    let has_self_receiver = input_fn
538        .sig
539        .inputs
540        .first()
541        .map(|arg| matches!(arg, FnArg::Receiver(_)))
542        .unwrap_or(false);
543
544    // Nested mode is required when _self = Type is used (for Self replacement in nested fn).
545    // In sibling mode, self/Self work naturally since both fns live in the same impl scope.
546    // However, if there's a self receiver in nested mode, we still need _self = Type.
547    if has_self_receiver && args.nested && args.self_type.is_none() {
548        let msg = format!(
549            "{} with self receiver in nested mode requires `_self = Type` argument.\n\
550             Example: #[{}(nested, _self = MyType)]\n\
551             Use `_self` (not `self`) in the function body to refer to self.\n\
552             \n\
553             Alternatively, remove `nested` to use sibling expansion (default), \
554             which handles self/Self naturally.",
555            macro_name, macro_name
556        );
557        return syn::Error::new_spanned(&input_fn.sig, msg)
558            .to_compile_error()
559            .into();
560    }
561
562    // Find the token parameter, its features, target arch, and token type name
563    let TokenParamInfo {
564        ident: _token_ident,
565        features,
566        target_arch,
567        token_type_name,
568        magetypes_namespace,
569    } = match find_token_param(&input_fn.sig) {
570        Some(result) => result,
571        None => {
572            // Check for specific misuse: featureless traits like SimdToken
573            if let Some(trait_name) = diagnose_featureless_token(&input_fn.sig) {
574                let msg = format!(
575                    "`{trait_name}` cannot be used as a token bound in #[{macro_name}] \
576                     because it doesn't specify any CPU features.\n\
577                     \n\
578                     #[{macro_name}] needs concrete features to generate #[target_feature]. \
579                     Use a concrete token or a feature trait:\n\
580                     \n\
581                     Concrete tokens: X64V3Token, Desktop64, NeonToken, Arm64V2Token, ...\n\
582                     Feature traits:  impl HasX64V2, impl HasNeon, impl HasArm64V3, ..."
583                );
584                return syn::Error::new_spanned(&input_fn.sig, msg)
585                    .to_compile_error()
586                    .into();
587            }
588            let msg = format!(
589                "{} requires a token parameter. Supported forms:\n\
590                 - Concrete: `token: X64V3Token`\n\
591                 - impl Trait: `token: impl HasX64V2`\n\
592                 - Generic: `fn foo<T: HasX64V2>(token: T, ...)`\n\
593                 - With self: `#[{}(_self = Type)] fn method(&self, token: impl HasNeon, ...)`",
594                macro_name, macro_name
595            );
596            return syn::Error::new_spanned(&input_fn.sig, msg)
597                .to_compile_error()
598                .into();
599        }
600    };
601
602    // Check: import_intrinsics with AVX-512 features requires the avx512 cargo feature
603    // on archmage (propagated to archmage-macros). Without it, 512-bit safe memory ops
604    // from safe_unaligned_simd are not available, and _mm512_loadu_ps etc. would resolve
605    // to the unsafe core::arch versions (taking raw pointers instead of references).
606    //
607    // We check the resolved features (not the token name) so this works uniformly for
608    // concrete tokens (X64V4Token), trait bounds (impl HasX64V4), and generics (T: HasX64V4).
609    #[cfg(not(feature = "avx512"))]
610    if args.import_intrinsics && features.iter().any(|f| f.starts_with("avx512")) {
611        let token_desc = token_type_name.as_deref().unwrap_or("an AVX-512 token");
612        let msg = format!(
613            "Using {token_desc} with `import_intrinsics` requires the `avx512` feature.\n\
614             \n\
615             Add to your Cargo.toml:\n\
616             \x20 archmage = {{ version = \"...\", features = [\"avx512\"] }}\n\
617             \n\
618             Without it, 512-bit safe memory ops (_mm512_loadu_ps etc.) are not available.\n\
619             If you only need value intrinsics (no memory ops), remove `import_intrinsics`."
620        );
621        return syn::Error::new_spanned(&input_fn.sig, msg)
622            .to_compile_error()
623            .into();
624    }
625
626    // Prepend import statements to body if requested
627    let body_imports = generate_imports(
628        target_arch,
629        magetypes_namespace,
630        args.import_intrinsics,
631        args.import_magetypes,
632    );
633    if !body_imports.is_empty() {
634        let original_body = &input_fn.body;
635        input_fn.body = quote! {
636            #body_imports
637            #original_body
638        };
639    }
640
641    // Build target_feature attributes
642    let target_feature_attrs: Vec<Attribute> = features
643        .iter()
644        .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
645        .collect();
646
647    // Rename wildcard patterns (`_: Type`) to named params so the inner/sibling call works
648    let mut wild_rename_counter = 0u32;
649    for arg in &mut input_fn.sig.inputs {
650        if let FnArg::Typed(pat_type) = arg
651            && matches!(pat_type.pat.as_ref(), syn::Pat::Wild(_))
652        {
653            let ident = format_ident!("__archmage_wild_{}", wild_rename_counter);
654            wild_rename_counter += 1;
655            *pat_type.pat = syn::Pat::Ident(syn::PatIdent {
656                attrs: vec![],
657                by_ref: None,
658                mutability: None,
659                ident,
660                subpat: None,
661            });
662        }
663    }
664
665    // Choose inline attribute based on args
666    let inline_attr: Attribute = if args.inline_always {
667        parse_quote!(#[inline(always)])
668    } else {
669        parse_quote!(#[inline])
670    };
671
672    // On wasm32, #[target_feature(enable = "simd128")] functions are safe (Rust 1.54+).
673    // The wasm validation model guarantees unsupported instructions trap deterministically,
674    // so there's no UB from feature mismatch. Skip the unsafe wrapper entirely.
675    if target_arch == Some("wasm32") {
676        return arcane_impl_wasm_safe(
677            input_fn,
678            &args,
679            token_type_name,
680            target_feature_attrs,
681            inline_attr,
682        );
683    }
684
685    if args.nested {
686        arcane_impl_nested(
687            input_fn,
688            &args,
689            target_arch,
690            token_type_name,
691            target_feature_attrs,
692            inline_attr,
693        )
694    } else {
695        arcane_impl_sibling(
696            input_fn,
697            &args,
698            target_arch,
699            token_type_name,
700            target_feature_attrs,
701            inline_attr,
702        )
703    }
704}
705
706/// WASM-safe expansion: emits rite-style output (no unsafe wrapper).
707///
708/// On wasm32, `#[target_feature(enable = "simd128")]` is safe — the wasm validation
709/// model traps deterministically on unsupported instructions, so there's no UB.
710/// We emit the function directly with `#[target_feature]` + `#[inline]`, like `#[rite]`.
711///
712/// If `_self = Type` is set, we inject `let _self = self;` at the top of the body
713/// (the function stays in impl scope, so `Self` resolves naturally — no replacement needed).
714fn arcane_impl_wasm_safe(
715    input_fn: LightFn,
716    args: &ArcaneArgs,
717    token_type_name: Option<String>,
718    target_feature_attrs: Vec<Attribute>,
719    inline_attr: Attribute,
720) -> TokenStream {
721    let vis = &input_fn.vis;
722    let sig = &input_fn.sig;
723    let fn_name = &sig.ident;
724    let attrs = &input_fn.attrs;
725
726    let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
727
728    // If _self = Type is set, inject `let _self = self;` at top of body so user code
729    // referencing `_self` works. The function remains in impl scope, so `Self` resolves
730    // naturally — no Self replacement needed (unlike nested mode's inner fn).
731    let body = if args.self_type.is_some() {
732        let original_body = &input_fn.body;
733        quote! {
734            let _self = self;
735            #original_body
736        }
737    } else {
738        input_fn.body.clone()
739    };
740
741    // Prepend target_feature + inline attrs, filtering user #[inline] to avoid duplicates
742    let mut new_attrs = target_feature_attrs;
743    new_attrs.push(inline_attr);
744    for attr in filter_inline_attrs(attrs) {
745        new_attrs.push(attr.clone());
746    }
747
748    let stub = if args.stub {
749        // Build stub args for suppressing unused-variable warnings
750        let stub_args: Vec<proc_macro2::TokenStream> = sig
751            .inputs
752            .iter()
753            .filter_map(|arg| match arg {
754                FnArg::Typed(pat_type) => {
755                    if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
756                        let ident = &pat_ident.ident;
757                        Some(quote!(#ident))
758                    } else {
759                        None
760                    }
761                }
762                FnArg::Receiver(_) => None,
763            })
764            .collect();
765
766        quote! {
767            #[cfg(not(target_arch = "wasm32"))]
768            #vis #sig {
769                let _ = (#(#stub_args),*);
770                unreachable!(
771                    "BUG: {}() was called but requires {} (target_arch = \"wasm32\"). \
772                     {}::summon() returns None on this architecture, so this function \
773                     is unreachable in safe code. If you used forge_token_dangerously(), \
774                     that is the bug.",
775                    stringify!(#fn_name),
776                    #token_type_str,
777                    #token_type_str,
778                )
779            }
780        }
781    } else {
782        quote! {}
783    };
784
785    let expanded = quote! {
786        #[cfg(target_arch = "wasm32")]
787        #(#new_attrs)*
788        #vis #sig {
789            #body
790        }
791
792        #stub
793    };
794
795    expanded.into()
796}
797
798/// Sibling expansion (default): generates two functions at the same scope level.
799///
800/// ```ignore
801/// // #[arcane] fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { body }
802/// // expands to:
803/// #[cfg(target_arch = "x86_64")]
804/// #[doc(hidden)]
805/// #[target_feature(enable = "avx2,fma,...")]
806/// #[inline]
807/// fn __arcane_process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { body }
808///
809/// #[cfg(target_arch = "x86_64")]
810/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] {
811///     unsafe { __arcane_process(token, data) }
812/// }
813/// ```
814///
815/// The sibling function is safe (Rust 2024 edition allows safe `#[target_feature]`
816/// functions). Only the call from the wrapper needs `unsafe` because the wrapper
817/// lacks matching target features. Compatible with `#![forbid(unsafe_code)]`.
818///
819/// Self/self work naturally since both functions live in the same impl scope.
820fn arcane_impl_sibling(
821    input_fn: LightFn,
822    args: &ArcaneArgs,
823    target_arch: Option<&str>,
824    token_type_name: Option<String>,
825    target_feature_attrs: Vec<Attribute>,
826    inline_attr: Attribute,
827) -> TokenStream {
828    let vis = &input_fn.vis;
829    let sig = &input_fn.sig;
830    let fn_name = &sig.ident;
831    let generics = &sig.generics;
832    let where_clause = &generics.where_clause;
833    let inputs = &sig.inputs;
834    let output = &sig.output;
835    let body = &input_fn.body;
836    // Filter out user #[inline] attrs to avoid duplicates (will become a hard error).
837    // The wrapper gets #[inline(always)] unconditionally — it's a trivial unsafe { sibling() }.
838    let attrs = filter_inline_attrs(&input_fn.attrs);
839    // Lint-control attrs (#[allow(...)], #[expect(...)], etc.) must also go on the sibling,
840    // because the sibling has the same parameters and clippy lints it independently.
841    let lint_attrs = filter_lint_attrs(&input_fn.attrs);
842
843    let sibling_name = format_ident!("__arcane_{}", fn_name);
844
845    // Detect self receiver
846    let has_self_receiver = inputs
847        .first()
848        .map(|arg| matches!(arg, FnArg::Receiver(_)))
849        .unwrap_or(false);
850
851    // Build sibling signature: same as original but with sibling name, #[doc(hidden)]
852    // NOT unsafe — Rust 2024 edition allows safe #[target_feature] functions.
853    // Only the call from non-matching context (the wrapper) needs unsafe.
854    let sibling_sig_inputs = inputs;
855
856    // Build turbofish for forwarding type/const generic params to sibling
857    let turbofish = build_turbofish(generics);
858
859    // Build the call from wrapper to sibling
860    let sibling_call = if has_self_receiver {
861        // Method: self.__arcane_fn::<T, N>(other_args...)
862        let other_args: Vec<proc_macro2::TokenStream> = inputs
863            .iter()
864            .skip(1) // skip self receiver
865            .filter_map(|arg| {
866                if let FnArg::Typed(pat_type) = arg
867                    && let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref()
868                {
869                    let ident = &pat_ident.ident;
870                    Some(quote!(#ident))
871                } else {
872                    None
873                }
874            })
875            .collect();
876        quote! { self.#sibling_name #turbofish(#(#other_args),*) }
877    } else {
878        // Free function: __arcane_fn::<T, N>(all_args...)
879        let all_args: Vec<proc_macro2::TokenStream> = inputs
880            .iter()
881            .filter_map(|arg| {
882                if let FnArg::Typed(pat_type) = arg
883                    && let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref()
884                {
885                    let ident = &pat_ident.ident;
886                    Some(quote!(#ident))
887                } else {
888                    None
889                }
890            })
891            .collect();
892        quote! { #sibling_name #turbofish(#(#all_args),*) }
893    };
894
895    // Build stub args for suppressing unused warnings
896    let stub_args: Vec<proc_macro2::TokenStream> = inputs
897        .iter()
898        .filter_map(|arg| match arg {
899            FnArg::Typed(pat_type) => {
900                if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
901                    let ident = &pat_ident.ident;
902                    Some(quote!(#ident))
903                } else {
904                    None
905                }
906            }
907            FnArg::Receiver(_) => None, // self doesn't need _ = suppression
908        })
909        .collect();
910
911    let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
912
913    let expanded = if let Some(arch) = target_arch {
914        // Sibling function: #[doc(hidden)] #[target_feature] fn __arcane_fn(...)
915        // Always private — only the wrapper is user-visible.
916        // Safe declaration — Rust 2024 allows safe #[target_feature] functions.
917        let sibling_fn = quote! {
918            #[cfg(target_arch = #arch)]
919            #[doc(hidden)]
920            #(#lint_attrs)*
921            #(#target_feature_attrs)*
922            #inline_attr
923            fn #sibling_name #generics (#sibling_sig_inputs) #output #where_clause {
924                #body
925            }
926        };
927
928        // Wrapper function: fn original_name(...) { unsafe { sibling_call } }
929        // The unsafe block is needed because the sibling has #[target_feature] and
930        // the wrapper doesn't — calling across this boundary requires unsafe.
931        let wrapper_fn = quote! {
932            #[cfg(target_arch = #arch)]
933            #(#attrs)*
934            #[inline(always)]
935            #vis #sig {
936                // SAFETY: The token parameter proves the required CPU features are available.
937                // Calling a #[target_feature] function from a non-matching context requires
938                // unsafe because the CPU may not support those instructions. The token's
939                // existence proves summon() succeeded, so the features are available.
940                unsafe { #sibling_call }
941            }
942        };
943
944        // Optional stub for other architectures
945        let stub = if args.stub {
946            quote! {
947                #[cfg(not(target_arch = #arch))]
948                #(#attrs)*
949                #vis #sig {
950                    let _ = (#(#stub_args),*);
951                    unreachable!(
952                        "BUG: {}() was called but requires {} (target_arch = \"{}\"). \
953                         {}::summon() returns None on this architecture, so this function \
954                         is unreachable in safe code. If you used forge_token_dangerously(), \
955                         that is the bug.",
956                        stringify!(#fn_name),
957                        #token_type_str,
958                        #arch,
959                        #token_type_str,
960                    )
961                }
962            }
963        } else {
964            quote! {}
965        };
966
967        quote! {
968            #sibling_fn
969            #wrapper_fn
970            #stub
971        }
972    } else {
973        // No specific arch (trait bounds or generic) - no cfg guards, no stub needed.
974        // Still use sibling pattern for consistency. Sibling is always private.
975        let sibling_fn = quote! {
976            #[doc(hidden)]
977            #(#lint_attrs)*
978            #(#target_feature_attrs)*
979            #inline_attr
980            fn #sibling_name #generics (#sibling_sig_inputs) #output #where_clause {
981                #body
982            }
983        };
984
985        let wrapper_fn = quote! {
986            #(#attrs)*
987            #[inline(always)]
988            #vis #sig {
989                // SAFETY: The token proves the required CPU features are available.
990                unsafe { #sibling_call }
991            }
992        };
993
994        quote! {
995            #sibling_fn
996            #wrapper_fn
997        }
998    };
999
1000    expanded.into()
1001}
1002
1003/// Nested inner function expansion (opt-in via `nested` or `_self = Type`).
1004///
1005/// This is the original approach: generates a nested inner function inside the
1006/// original function. Required when `_self = Type` is used because Self must be
1007/// replaced in the nested function (where it's not in scope).
1008fn arcane_impl_nested(
1009    input_fn: LightFn,
1010    args: &ArcaneArgs,
1011    target_arch: Option<&str>,
1012    token_type_name: Option<String>,
1013    target_feature_attrs: Vec<Attribute>,
1014    inline_attr: Attribute,
1015) -> TokenStream {
1016    let vis = &input_fn.vis;
1017    let sig = &input_fn.sig;
1018    let fn_name = &sig.ident;
1019    let generics = &sig.generics;
1020    let where_clause = &generics.where_clause;
1021    let inputs = &sig.inputs;
1022    let output = &sig.output;
1023    let body = &input_fn.body;
1024    // Filter out user #[inline] attrs to avoid duplicates (will become a hard error).
1025    let attrs = filter_inline_attrs(&input_fn.attrs);
1026    // Propagate lint attrs to inner function (same issue as sibling mode — #17)
1027    let lint_attrs = filter_lint_attrs(&input_fn.attrs);
1028
1029    // Determine self receiver type if present
1030    let self_receiver_kind: Option<SelfReceiver> = inputs.first().and_then(|arg| match arg {
1031        FnArg::Receiver(receiver) => {
1032            if receiver.reference.is_none() {
1033                Some(SelfReceiver::Owned)
1034            } else if receiver.mutability.is_some() {
1035                Some(SelfReceiver::RefMut)
1036            } else {
1037                Some(SelfReceiver::Ref)
1038            }
1039        }
1040        _ => None,
1041    });
1042
1043    // Build inner function parameters, transforming self if needed.
1044    // Also replace Self in non-self parameter types when _self = Type is set,
1045    // since the inner function is a nested fn where Self from the impl is not in scope.
1046    let inner_params: Vec<proc_macro2::TokenStream> = inputs
1047        .iter()
1048        .map(|arg| match arg {
1049            FnArg::Receiver(_) => {
1050                // Transform self receiver to _self parameter
1051                let self_ty = args.self_type.as_ref().unwrap();
1052                match self_receiver_kind.as_ref().unwrap() {
1053                    SelfReceiver::Owned => quote!(_self: #self_ty),
1054                    SelfReceiver::Ref => quote!(_self: &#self_ty),
1055                    SelfReceiver::RefMut => quote!(_self: &mut #self_ty),
1056                }
1057            }
1058            FnArg::Typed(pat_type) => {
1059                if let Some(ref self_ty) = args.self_type {
1060                    replace_self_in_tokens(quote!(#pat_type), self_ty)
1061                } else {
1062                    quote!(#pat_type)
1063                }
1064            }
1065        })
1066        .collect();
1067
1068    // Build inner function call arguments
1069    let inner_args: Vec<proc_macro2::TokenStream> = inputs
1070        .iter()
1071        .filter_map(|arg| match arg {
1072            FnArg::Typed(pat_type) => {
1073                if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
1074                    let ident = &pat_ident.ident;
1075                    Some(quote!(#ident))
1076                } else {
1077                    None
1078                }
1079            }
1080            FnArg::Receiver(_) => Some(quote!(self)), // Pass self to inner as _self
1081        })
1082        .collect();
1083
1084    let inner_fn_name = format_ident!("__simd_inner_{}", fn_name);
1085
1086    // Build turbofish for forwarding type/const generic params to inner function
1087    let turbofish = build_turbofish(generics);
1088
1089    // Transform output, body, and where clause to replace Self with concrete type if needed.
1090    let (inner_output, inner_body, inner_where_clause): (
1091        proc_macro2::TokenStream,
1092        proc_macro2::TokenStream,
1093        proc_macro2::TokenStream,
1094    ) = if let Some(ref self_ty) = args.self_type {
1095        let transformed_output = replace_self_in_tokens(output.to_token_stream(), self_ty);
1096        let transformed_body = replace_self_in_tokens(body.clone(), self_ty);
1097        let transformed_where = where_clause
1098            .as_ref()
1099            .map(|wc| replace_self_in_tokens(wc.to_token_stream(), self_ty))
1100            .unwrap_or_default();
1101        (transformed_output, transformed_body, transformed_where)
1102    } else {
1103        (
1104            output.to_token_stream(),
1105            body.clone(),
1106            where_clause
1107                .as_ref()
1108                .map(|wc| wc.to_token_stream())
1109                .unwrap_or_default(),
1110        )
1111    };
1112
1113    let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
1114    let expanded = if let Some(arch) = target_arch {
1115        let stub = if args.stub {
1116            quote! {
1117                // Stub for other architectures - the token cannot be obtained
1118                #[cfg(not(target_arch = #arch))]
1119                #(#attrs)*
1120                #vis #sig {
1121                    let _ = (#(#inner_args),*);
1122                    unreachable!(
1123                        "BUG: {}() was called but requires {} (target_arch = \"{}\"). \
1124                         {}::summon() returns None on this architecture, so this function \
1125                         is unreachable in safe code. If you used forge_token_dangerously(), \
1126                         that is the bug.",
1127                        stringify!(#fn_name),
1128                        #token_type_str,
1129                        #arch,
1130                        #token_type_str,
1131                    )
1132                }
1133            }
1134        } else {
1135            quote! {}
1136        };
1137
1138        quote! {
1139            // Real implementation for the correct architecture
1140            #[cfg(target_arch = #arch)]
1141            #(#attrs)*
1142            #[inline(always)]
1143            #vis #sig {
1144                #(#target_feature_attrs)*
1145                #inline_attr
1146                #(#lint_attrs)*
1147                fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #inner_where_clause {
1148                    #inner_body
1149                }
1150
1151                // SAFETY: The token parameter proves the required CPU features are available.
1152                unsafe { #inner_fn_name #turbofish(#(#inner_args),*) }
1153            }
1154
1155            #stub
1156        }
1157    } else {
1158        // No specific arch (trait bounds or generic) - generate without cfg guards
1159        quote! {
1160            #(#attrs)*
1161            #[inline(always)]
1162            #vis #sig {
1163                #(#target_feature_attrs)*
1164                #inline_attr
1165                #(#lint_attrs)*
1166                fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #inner_where_clause {
1167                    #inner_body
1168                }
1169
1170                // SAFETY: The token proves the required CPU features are available.
1171                unsafe { #inner_fn_name #turbofish(#(#inner_args),*) }
1172            }
1173        }
1174    };
1175
1176    expanded.into()
1177}
1178
1179/// Mark a function as an arcane SIMD function.
1180///
1181/// This macro generates a safe wrapper around a `#[target_feature]` function.
1182/// The token parameter type determines which CPU features are enabled.
1183///
1184/// # Expansion Modes
1185///
1186/// ## Sibling (default)
1187///
1188/// Generates two functions at the same scope: a safe `#[target_feature]` sibling
1189/// and a safe wrapper. `self`/`Self` work naturally since both functions share scope.
1190/// Compatible with `#![forbid(unsafe_code)]`.
1191///
1192/// ```ignore
1193/// #[arcane]
1194/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { /* body */ }
1195/// // Expands to (x86_64 only):
1196/// #[cfg(target_arch = "x86_64")]
1197/// #[doc(hidden)]
1198/// #[target_feature(enable = "avx2,fma,...")]
1199/// fn __arcane_process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { /* body */ }
1200///
1201/// #[cfg(target_arch = "x86_64")]
1202/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] {
1203///     unsafe { __arcane_process(token, data) }
1204/// }
1205/// ```
1206///
1207/// Methods work naturally:
1208///
1209/// ```ignore
1210/// impl MyType {
1211///     #[arcane]
1212///     fn compute(&self, token: X64V3Token) -> f32 {
1213///         self.data.iter().sum()  // self/Self just work!
1214///     }
1215/// }
1216/// ```
1217///
1218/// ## Nested (`nested` or `_self = Type`)
1219///
1220/// Generates a nested inner function inside the original. Required for trait impls
1221/// (where sibling functions would fail) and when `_self = Type` is used.
1222///
1223/// ```ignore
1224/// impl SimdOps for MyType {
1225///     #[arcane(_self = MyType)]
1226///     fn compute(&self, token: X64V3Token) -> Self {
1227///         // Use _self instead of self, Self replaced with MyType
1228///         _self.data.iter().sum()
1229///     }
1230/// }
1231/// ```
1232///
1233/// # Cross-Architecture Behavior
1234///
1235/// **Default (cfg-out):** On the wrong architecture, the function is not emitted
1236/// at all — no stub, no dead code. Code that references it must be cfg-gated.
1237///
1238/// **With `stub`:** Generates an `unreachable!()` stub on wrong architectures.
1239/// Use when cross-arch dispatch references the function without cfg guards.
1240///
1241/// ```ignore
1242/// #[arcane(stub)]  // generates stub on wrong arch
1243/// fn process_neon(token: NeonToken, data: &[f32]) -> f32 { ... }
1244/// ```
1245///
1246/// `incant!` is unaffected — it already cfg-gates dispatch calls by architecture.
1247///
1248/// # Token Parameter Forms
1249///
1250/// ```ignore
1251/// // Concrete token
1252/// #[arcane]
1253/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { ... }
1254///
1255/// // impl Trait bound
1256/// #[arcane]
1257/// fn process(token: impl HasX64V2, data: &[f32; 8]) -> [f32; 8] { ... }
1258///
1259/// // Generic with inline or where-clause bounds
1260/// #[arcane]
1261/// fn process<T: HasX64V2>(token: T, data: &[f32; 8]) -> [f32; 8] { ... }
1262///
1263/// // Wildcard
1264/// #[arcane]
1265/// fn process(_: X64V3Token, data: &[f32; 8]) -> [f32; 8] { ... }
1266/// ```
1267///
1268/// # Options
1269///
1270/// | Option | Effect |
1271/// |--------|--------|
1272/// | `stub` | Generate `unreachable!()` stub on wrong architecture |
1273/// | `nested` | Use nested inner function instead of sibling |
1274/// | `_self = Type` | Implies `nested`, transforms self receiver, replaces Self |
1275/// | `inline_always` | Use `#[inline(always)]` (requires nightly) |
1276/// | `import_intrinsics` | Auto-import `archmage::intrinsics::{arch}::*` (includes safe memory ops) |
1277/// | `import_magetypes` | Auto-import `magetypes::simd::{ns}::*` and `magetypes::simd::backends::*` |
1278///
1279/// ## Auto-Imports
1280///
1281/// `import_intrinsics` and `import_magetypes` inject `use` statements into the
1282/// function body, eliminating boilerplate. The macro derives the architecture and
1283/// namespace from the token type:
1284///
1285/// ```ignore
1286/// // Without auto-imports — lots of boilerplate:
1287/// use std::arch::x86_64::*;
1288/// use magetypes::simd::v3::*;
1289///
1290/// #[arcane]
1291/// fn process(token: X64V3Token, data: &[f32; 8]) -> f32 {
1292///     let v = f32x8::load(token, data);
1293///     let zero = _mm256_setzero_ps();
1294///     // ...
1295/// }
1296///
1297/// // With auto-imports — clean:
1298/// #[arcane(import_intrinsics, import_magetypes)]
1299/// fn process(token: X64V3Token, data: &[f32; 8]) -> f32 {
1300///     let v = f32x8::load(token, data);
1301///     let zero = _mm256_setzero_ps();
1302///     // ...
1303/// }
1304/// ```
1305///
1306/// The namespace mapping is token-driven:
1307///
1308/// | Token | `import_intrinsics` | `import_magetypes` |
1309/// |-------|--------------------|--------------------|
1310/// | `X64V1..V3Token` | `archmage::intrinsics::x86_64::*` | `magetypes::simd::v3::*` |
1311/// | `X64V4Token` | `archmage::intrinsics::x86_64::*` | `magetypes::simd::v4::*` |
1312/// | `X64V4xToken` | `archmage::intrinsics::x86_64::*` | `magetypes::simd::v4x::*` |
1313/// | `NeonToken` / ARM | `archmage::intrinsics::aarch64::*` | `magetypes::simd::neon::*` |
1314/// | `Wasm128Token` | `archmage::intrinsics::wasm32::*` | `magetypes::simd::wasm128::*` |
1315///
1316/// Works with concrete tokens, `impl Trait` bounds, and generic parameters.
1317///
1318/// # Supported Tokens
1319///
1320/// - **x86_64**: `X64V2Token`, `X64V3Token`/`Desktop64`, `X64V4Token`/`Avx512Token`/`Server64`,
1321///   `X64V4xToken`, `Avx512Fp16Token`, `X64CryptoToken`, `X64V3CryptoToken`
1322/// - **ARM**: `NeonToken`/`Arm64`, `Arm64V2Token`, `Arm64V3Token`,
1323///   `NeonAesToken`, `NeonSha3Token`, `NeonCrcToken`
1324/// - **WASM**: `Wasm128Token`
1325///
1326/// # Supported Trait Bounds
1327///
1328/// `HasX64V2`, `HasX64V4`, `HasNeon`, `HasNeonAes`, `HasNeonSha3`, `HasArm64V2`, `HasArm64V3`
1329///
1330/// ```ignore
1331/// #![feature(target_feature_inline_always)]
1332///
1333/// #[arcane(inline_always)]
1334/// fn fast_kernel(token: Avx2Token, data: &mut [f32]) {
1335///     // Inner function will use #[inline(always)]
1336/// }
1337/// ```
1338#[proc_macro_attribute]
1339pub fn arcane(attr: TokenStream, item: TokenStream) -> TokenStream {
1340    let args = parse_macro_input!(attr as ArcaneArgs);
1341    let input_fn = parse_macro_input!(item as LightFn);
1342    arcane_impl(input_fn, "arcane", args)
1343}
1344
1345/// Legacy alias for [`arcane`].
1346///
1347/// **Deprecated:** Use `#[arcane]` instead. This alias exists only for migration.
1348#[proc_macro_attribute]
1349#[doc(hidden)]
1350pub fn simd_fn(attr: TokenStream, item: TokenStream) -> TokenStream {
1351    let args = parse_macro_input!(attr as ArcaneArgs);
1352    let input_fn = parse_macro_input!(item as LightFn);
1353    arcane_impl(input_fn, "simd_fn", args)
1354}
1355
1356/// Descriptive alias for [`arcane`].
1357///
1358/// Generates a safe wrapper around a `#[target_feature]` inner function.
1359/// The token type in your signature determines which CPU features are enabled.
1360/// Creates an LLVM optimization boundary — use [`token_target_features`]
1361/// (alias for [`rite`]) for inner helpers to avoid this.
1362///
1363/// Since Rust 1.85, value-based SIMD intrinsics are safe inside
1364/// `#[target_feature]` functions. This macro generates the `#[target_feature]`
1365/// wrapper so you never need to write `unsafe` for SIMD code.
1366///
1367/// See [`arcane`] for full documentation and examples.
1368#[proc_macro_attribute]
1369pub fn token_target_features_boundary(attr: TokenStream, item: TokenStream) -> TokenStream {
1370    let args = parse_macro_input!(attr as ArcaneArgs);
1371    let input_fn = parse_macro_input!(item as LightFn);
1372    arcane_impl(input_fn, "token_target_features_boundary", args)
1373}
1374
1375// ============================================================================
1376// Rite macro for inner SIMD functions (inlines into matching #[target_feature] callers)
1377// ============================================================================
1378
1379/// Annotate inner SIMD helpers called from `#[arcane]` functions.
1380///
1381/// Unlike `#[arcane]`, which creates an inner `#[target_feature]` function behind
1382/// a safe boundary, `#[rite]` adds `#[target_feature]` and `#[inline]` directly.
1383/// LLVM inlines it into any caller with matching features — no boundary crossing.
1384///
1385/// # Three Modes
1386///
1387/// **Token-based:** Reads the token type from the function signature.
1388/// ```ignore
1389/// #[rite]
1390/// fn helper(_: X64V3Token, v: __m256) -> __m256 { _mm256_add_ps(v, v) }
1391/// ```
1392///
1393/// **Tier-based:** Specify the tier name directly, no token parameter needed.
1394/// ```ignore
1395/// #[rite(v3)]
1396/// fn helper(v: __m256) -> __m256 { _mm256_add_ps(v, v) }
1397/// ```
1398///
1399/// Both produce identical code. The token form can be easier to remember if
1400/// you already have the token in scope.
1401///
1402/// **Multi-tier:** Specify multiple tiers to generate suffixed variants.
1403/// ```ignore
1404/// #[rite(v3, v4)]
1405/// fn process(data: &[f32; 4]) -> f32 { data.iter().sum() }
1406/// // Generates: process_v3() and process_v4()
1407/// ```
1408///
1409/// Each variant gets its own `#[target_feature]` and `#[cfg(target_arch)]`.
1410/// Since Rust 1.85, calling these from a matching `#[arcane]` or `#[rite]`
1411/// context is safe — no `unsafe` needed when the caller has matching or
1412/// superset features.
1413///
1414/// # Safety
1415///
1416/// `#[rite]` functions can only be safely called from contexts where the
1417/// required CPU features are enabled:
1418/// - From within `#[arcane]` functions with matching/superset tokens
1419/// - From within other `#[rite]` functions with matching/superset tokens
1420/// - From code compiled with `-Ctarget-cpu` that enables the features
1421///
1422/// Calling from other contexts requires `unsafe` and the caller must ensure
1423/// the CPU supports the required features.
1424///
1425/// # Cross-Architecture Behavior
1426///
1427/// Like `#[arcane]`, defaults to cfg-out (no function on wrong arch).
1428/// Use `#[rite(stub)]` to generate an unreachable stub instead.
1429///
1430/// # Options
1431///
1432/// | Option | Effect |
1433/// |--------|--------|
1434/// | tier name(s) | `v3`, `neon`, etc. One = single function; multiple = suffixed variants |
1435/// | `stub` | Generate `unreachable!()` stub on wrong architecture |
1436/// | `import_intrinsics` | Auto-import `archmage::intrinsics::{arch}::*` (includes safe memory ops) |
1437/// | `import_magetypes` | Auto-import `magetypes::simd::{ns}::*` and `magetypes::simd::backends::*` |
1438///
1439/// See `#[arcane]` docs for the full namespace mapping table.
1440///
1441/// # Comparison with #[arcane]
1442///
1443/// | Aspect | `#[arcane]` | `#[rite]` |
1444/// |--------|-------------|-----------|
1445/// | Creates wrapper | Yes | No |
1446/// | Entry point | Yes | No |
1447/// | Inlines into caller | No (barrier) | Yes |
1448/// | Safe to call anywhere | Yes (with token) | Only from feature-enabled context |
1449/// | Multi-tier variants | No | Yes (`#[rite(v3, v4, neon)]`) |
1450/// | `stub` param | Yes | Yes |
1451/// | `import_intrinsics` | Yes | Yes |
1452/// | `import_magetypes` | Yes | Yes |
1453#[proc_macro_attribute]
1454pub fn rite(attr: TokenStream, item: TokenStream) -> TokenStream {
1455    let args = parse_macro_input!(attr as RiteArgs);
1456    let input_fn = parse_macro_input!(item as LightFn);
1457    rite_impl(input_fn, args)
1458}
1459
1460/// Descriptive alias for [`rite`].
1461///
1462/// Applies `#[target_feature]` + `#[inline]` based on the token type in your
1463/// function signature. No wrapper, no optimization boundary. Use for functions
1464/// called from within `#[arcane]`/`#[token_target_features_boundary]` code.
1465///
1466/// Since Rust 1.85, calling a `#[target_feature]` function from another function
1467/// with matching features is safe — no `unsafe` needed.
1468///
1469/// See [`rite`] for full documentation and examples.
1470#[proc_macro_attribute]
1471pub fn token_target_features(attr: TokenStream, item: TokenStream) -> TokenStream {
1472    let args = parse_macro_input!(attr as RiteArgs);
1473    let input_fn = parse_macro_input!(item as LightFn);
1474    rite_impl(input_fn, args)
1475}
1476
1477/// Arguments for the `#[rite]` macro.
1478#[derive(Default)]
1479struct RiteArgs {
1480    /// Generate an `unreachable!()` stub on the wrong architecture.
1481    /// Default is false (cfg-out: no function emitted on wrong arch).
1482    stub: bool,
1483    /// Inject `use archmage::intrinsics::{arch}::*;` (includes safe memory ops).
1484    import_intrinsics: bool,
1485    /// Inject `use magetypes::simd::{ns}::*;`, `use magetypes::simd::generic::*;`,
1486    /// and `use magetypes::simd::backends::*;`.
1487    import_magetypes: bool,
1488    /// Tiers specified directly (e.g., `#[rite(v3)]` or `#[rite(v3, v4, neon)]`).
1489    /// Stored as canonical token names (e.g., "X64V3Token").
1490    /// Single tier: generates one function (no suffix, no token parameter needed).
1491    /// Multiple tiers: generates suffixed variants (e.g., `fn_v3`, `fn_v4`, `fn_neon`).
1492    tier_tokens: Vec<String>,
1493}
1494
1495impl Parse for RiteArgs {
1496    fn parse(input: ParseStream) -> syn::Result<Self> {
1497        let mut args = RiteArgs::default();
1498
1499        while !input.is_empty() {
1500            let ident: Ident = input.parse()?;
1501            match ident.to_string().as_str() {
1502                "stub" => args.stub = true,
1503                "import_intrinsics" => args.import_intrinsics = true,
1504                "import_magetypes" => args.import_magetypes = true,
1505                other => {
1506                    if let Some(canonical) = tier_to_canonical_token(other) {
1507                        args.tier_tokens.push(String::from(canonical));
1508                    } else {
1509                        return Err(syn::Error::new(
1510                            ident.span(),
1511                            format!(
1512                                "unknown rite argument: `{}`. Supported: tier names \
1513                                 (v1, v2, v3, v4, neon, arm_v2, wasm128, ...), \
1514                                 `stub`, `import_intrinsics`, `import_magetypes`.",
1515                                other
1516                            ),
1517                        ));
1518                    }
1519                }
1520            }
1521            if input.peek(Token![,]) {
1522                let _: Token![,] = input.parse()?;
1523            }
1524        }
1525
1526        Ok(args)
1527    }
1528}
1529
1530/// Implementation for the `#[rite]` macro.
1531fn rite_impl(input_fn: LightFn, args: RiteArgs) -> TokenStream {
1532    // Multi-tier mode: generate suffixed variants for each tier
1533    if args.tier_tokens.len() > 1 {
1534        return rite_multi_tier_impl(input_fn, &args);
1535    }
1536
1537    // Single-tier or token-param mode
1538    rite_single_impl(input_fn, args)
1539}
1540
1541/// Generate a single `#[rite]` function (single tier or token-param mode).
1542fn rite_single_impl(mut input_fn: LightFn, args: RiteArgs) -> TokenStream {
1543    // Resolve features: either from tier name or from token parameter
1544    let TokenParamInfo {
1545        features,
1546        target_arch,
1547        token_type_name: _token_type_name,
1548        magetypes_namespace,
1549        ..
1550    } = if let Some(tier_token) = args.tier_tokens.first() {
1551        // Tier specified directly (e.g., #[rite(v3)]) — no token param needed
1552        let features = token_to_features(tier_token)
1553            .expect("tier_to_canonical_token returned invalid token name")
1554            .to_vec();
1555        let target_arch = token_to_arch(tier_token);
1556        let magetypes_namespace = token_to_magetypes_namespace(tier_token);
1557        TokenParamInfo {
1558            ident: Ident::new("_", proc_macro2::Span::call_site()),
1559            features,
1560            target_arch,
1561            token_type_name: Some(tier_token.clone()),
1562            magetypes_namespace,
1563        }
1564    } else {
1565        match find_token_param(&input_fn.sig) {
1566            Some(result) => result,
1567            None => {
1568                // Check for specific misuse: featureless traits like SimdToken
1569                if let Some(trait_name) = diagnose_featureless_token(&input_fn.sig) {
1570                    let msg = format!(
1571                        "`{trait_name}` cannot be used as a token bound in #[rite] \
1572                         because it doesn't specify any CPU features.\n\
1573                         \n\
1574                         #[rite] needs concrete features to generate #[target_feature]. \
1575                         Use a concrete token, a feature trait, or a tier name:\n\
1576                         \n\
1577                         Concrete tokens: X64V3Token, Desktop64, NeonToken, Arm64V2Token, ...\n\
1578                         Feature traits:  impl HasX64V2, impl HasNeon, impl HasArm64V3, ...\n\
1579                         Tier names:      #[rite(v3)], #[rite(neon)], #[rite(v4)], ..."
1580                    );
1581                    return syn::Error::new_spanned(&input_fn.sig, msg)
1582                        .to_compile_error()
1583                        .into();
1584                }
1585                let msg = "rite requires a token parameter or a tier name. Supported forms:\n\
1586                     - Tier name: `#[rite(v3)]`, `#[rite(neon)]`\n\
1587                     - Multi-tier: `#[rite(v3, v4, neon)]` (generates suffixed variants)\n\
1588                     - Concrete: `token: X64V3Token`\n\
1589                     - impl Trait: `token: impl HasX64V2`\n\
1590                     - Generic: `fn foo<T: HasX64V2>(token: T, ...)`";
1591                return syn::Error::new_spanned(&input_fn.sig, msg)
1592                    .to_compile_error()
1593                    .into();
1594            }
1595        }
1596    };
1597
1598    // Check: import_intrinsics with AVX-512 features requires the avx512 cargo feature.
1599    // Check resolved features (not token name) for uniform handling of concrete/trait/generic.
1600    #[cfg(not(feature = "avx512"))]
1601    if args.import_intrinsics && features.iter().any(|f| f.starts_with("avx512")) {
1602        let token_desc = _token_type_name.as_deref().unwrap_or("an AVX-512 token");
1603        let msg = format!(
1604            "Using {token_desc} with `import_intrinsics` requires the `avx512` feature.\n\
1605             \n\
1606             Add to your Cargo.toml:\n\
1607             \x20 archmage = {{ version = \"...\", features = [\"avx512\"] }}\n\
1608             \n\
1609             Without it, 512-bit safe memory ops (_mm512_loadu_ps etc.) are not available.\n\
1610             If you only need value intrinsics (no memory ops), remove `import_intrinsics`."
1611        );
1612        return syn::Error::new_spanned(&input_fn.sig, msg)
1613            .to_compile_error()
1614            .into();
1615    }
1616
1617    // Build target_feature attributes
1618    let target_feature_attrs: Vec<Attribute> = features
1619        .iter()
1620        .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
1621        .collect();
1622
1623    // Always use #[inline] - #[inline(always)] + #[target_feature] requires nightly
1624    let inline_attr: Attribute = parse_quote!(#[inline]);
1625
1626    // Prepend attributes to the function, filtering user #[inline] to avoid duplicates
1627    let mut new_attrs = target_feature_attrs;
1628    new_attrs.push(inline_attr);
1629    for attr in filter_inline_attrs(&input_fn.attrs) {
1630        new_attrs.push(attr.clone());
1631    }
1632    input_fn.attrs = new_attrs;
1633
1634    // Prepend import statements to body if requested
1635    let body_imports = generate_imports(
1636        target_arch,
1637        magetypes_namespace,
1638        args.import_intrinsics,
1639        args.import_magetypes,
1640    );
1641    if !body_imports.is_empty() {
1642        let original_body = &input_fn.body;
1643        input_fn.body = quote! {
1644            #body_imports
1645            #original_body
1646        };
1647    }
1648
1649    // If we know the target arch, generate cfg-gated impl (+ optional stub)
1650    if let Some(arch) = target_arch {
1651        let vis = &input_fn.vis;
1652        let sig = &input_fn.sig;
1653        let attrs = &input_fn.attrs;
1654        let body = &input_fn.body;
1655
1656        let stub = if args.stub {
1657            quote! {
1658                #[cfg(not(target_arch = #arch))]
1659                #vis #sig {
1660                    unreachable!(concat!(
1661                        "This function requires ",
1662                        #arch,
1663                        " architecture"
1664                    ))
1665                }
1666            }
1667        } else {
1668            quote! {}
1669        };
1670
1671        quote! {
1672            #[cfg(target_arch = #arch)]
1673            #(#attrs)*
1674            #vis #sig {
1675                #body
1676            }
1677
1678            #stub
1679        }
1680        .into()
1681    } else {
1682        // No specific arch (trait bounds) - just emit the annotated function
1683        quote!(#input_fn).into()
1684    }
1685}
1686
1687/// Generate multiple suffixed `#[rite]` variants for multi-tier mode.
1688///
1689/// `#[rite(v3, v4, neon)]` on `fn process(...)` generates:
1690/// - `fn process_v3(...)` with `#[target_feature(enable = "avx2,fma,...")]`
1691/// - `fn process_v4(...)` with `#[target_feature(enable = "avx512f,...")]`
1692/// - `fn process_neon(...)` with `#[target_feature(enable = "neon")]`
1693///
1694/// Each variant is cfg-gated to its architecture and gets `#[inline]`.
1695fn rite_multi_tier_impl(input_fn: LightFn, args: &RiteArgs) -> TokenStream {
1696    let fn_name = &input_fn.sig.ident;
1697    let mut variants = proc_macro2::TokenStream::new();
1698
1699    for tier_token in &args.tier_tokens {
1700        let features = match token_to_features(tier_token) {
1701            Some(f) => f,
1702            None => {
1703                return syn::Error::new_spanned(
1704                    &input_fn.sig,
1705                    format!("unknown token `{tier_token}` in multi-tier #[rite]"),
1706                )
1707                .to_compile_error()
1708                .into();
1709            }
1710        };
1711        let target_arch = token_to_arch(tier_token);
1712        let magetypes_namespace = token_to_magetypes_namespace(tier_token);
1713
1714        // Check: import_intrinsics with AVX-512 features requires the avx512 cargo feature.
1715        #[cfg(not(feature = "avx512"))]
1716        if args.import_intrinsics && features.iter().any(|f| f.starts_with("avx512")) {
1717            let msg = format!(
1718                "Using {tier_token} with `import_intrinsics` requires the `avx512` feature.\n\
1719                 \n\
1720                 Add to your Cargo.toml:\n\
1721                 \x20 archmage = {{ version = \"...\", features = [\"avx512\"] }}\n\
1722                 \n\
1723                 Without it, 512-bit safe memory ops (_mm512_loadu_ps etc.) are not available.\n\
1724                 If you only need value intrinsics (no memory ops), remove `import_intrinsics`."
1725            );
1726            return syn::Error::new_spanned(&input_fn.sig, msg)
1727                .to_compile_error()
1728                .into();
1729        }
1730
1731        let suffix = canonical_token_to_tier_suffix(tier_token)
1732            .expect("canonical token must have a tier suffix");
1733
1734        // Build suffixed function name
1735        let suffixed_ident = format_ident!("{}_{}", fn_name, suffix);
1736
1737        // Clone and rename the function
1738        let mut variant_fn = input_fn.clone();
1739        variant_fn.sig.ident = suffixed_ident;
1740
1741        // Build target_feature attributes
1742        let target_feature_attrs: Vec<Attribute> = features
1743            .iter()
1744            .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
1745            .collect();
1746        let inline_attr: Attribute = parse_quote!(#[inline]);
1747
1748        let mut new_attrs = target_feature_attrs;
1749        new_attrs.push(inline_attr);
1750        for attr in filter_inline_attrs(&variant_fn.attrs) {
1751            new_attrs.push(attr.clone());
1752        }
1753        variant_fn.attrs = new_attrs;
1754
1755        // Prepend import statements if requested
1756        let body_imports = generate_imports(
1757            target_arch,
1758            magetypes_namespace,
1759            args.import_intrinsics,
1760            args.import_magetypes,
1761        );
1762        if !body_imports.is_empty() {
1763            let original_body = &variant_fn.body;
1764            variant_fn.body = quote! {
1765                #body_imports
1766                #original_body
1767            };
1768        }
1769
1770        // Emit cfg-gated variant
1771        if let Some(arch) = target_arch {
1772            let vis = &variant_fn.vis;
1773            let sig = &variant_fn.sig;
1774            let attrs = &variant_fn.attrs;
1775            let body = &variant_fn.body;
1776
1777            variants.extend(quote! {
1778                #[cfg(target_arch = #arch)]
1779                #(#attrs)*
1780                #vis #sig {
1781                    #body
1782                }
1783            });
1784
1785            if args.stub {
1786                variants.extend(quote! {
1787                    #[cfg(not(target_arch = #arch))]
1788                    #vis #sig {
1789                        unreachable!(concat!(
1790                            "This function requires ",
1791                            #arch,
1792                            " architecture"
1793                        ))
1794                    }
1795                });
1796            }
1797        } else {
1798            // No specific arch — just emit the annotated function
1799            variants.extend(quote!(#variant_fn));
1800        }
1801    }
1802
1803    variants.into()
1804}
1805
1806// =============================================================================
1807// magetypes! macro - generate platform variants from generic function
1808// =============================================================================
1809
1810/// Generate platform-specific variants from a function by replacing `Token`.
1811///
1812/// Use `Token` as a placeholder for the token type. The macro generates
1813/// suffixed variants with `Token` replaced by the concrete token type, and
1814/// each variant wrapped in the appropriate `#[cfg(target_arch = ...)]` guard.
1815///
1816/// # Default tiers
1817///
1818/// Without arguments, generates `_v3`, `_v4`, `_neon`, `_wasm128`, `_scalar`:
1819///
1820/// ```rust,ignore
1821/// #[magetypes]
1822/// fn process(token: Token, data: &[f32]) -> f32 {
1823///     inner_simd_work(token, data)
1824/// }
1825/// ```
1826///
1827/// # Explicit tiers
1828///
1829/// Specify which tiers to generate:
1830///
1831/// ```rust,ignore
1832/// #[magetypes(v1, v3, neon)]
1833/// fn process(token: Token, data: &[f32]) -> f32 {
1834///     inner_simd_work(token, data)
1835/// }
1836/// // Generates: process_v1, process_v3, process_neon, process_scalar
1837/// ```
1838///
1839/// `scalar` is always included implicitly.
1840///
1841/// Known tiers: `v1`, `v2`, `v3`, `v4`, `v4x`, `neon`, `neon_aes`,
1842/// `neon_sha3`, `neon_crc`, `wasm128`, `wasm128_relaxed`, `scalar`.
1843///
1844/// # What gets replaced
1845///
1846/// **Only `Token`** is replaced — with the concrete token type for each variant
1847/// (e.g., `archmage::X64V3Token`, `archmage::ScalarToken`). SIMD types like
1848/// `f32x8` and constants like `LANES` are **not** replaced by this macro.
1849///
1850/// # Usage with incant!
1851///
1852/// The generated variants work with `incant!` for dispatch:
1853///
1854/// ```rust,ignore
1855/// pub fn process_api(data: &[f32]) -> f32 {
1856///     incant!(process(data))
1857/// }
1858///
1859/// // Or with matching explicit tiers:
1860/// pub fn process_api(data: &[f32]) -> f32 {
1861///     incant!(process(data), [v1, v3, neon, scalar])
1862/// }
1863/// ```
1864#[proc_macro_attribute]
1865pub fn magetypes(attr: TokenStream, item: TokenStream) -> TokenStream {
1866    let input_fn = parse_macro_input!(item as LightFn);
1867
1868    // Parse optional tier list from attribute args
1869    let tier_names: Vec<String> = if attr.is_empty() {
1870        DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect()
1871    } else {
1872        let parser = |input: ParseStream| input.parse_terminated(Ident::parse, Token![,]);
1873        let idents = match syn::parse::Parser::parse(parser, attr) {
1874            Ok(p) => p,
1875            Err(e) => return e.to_compile_error().into(),
1876        };
1877        idents.iter().map(|i| i.to_string()).collect()
1878    };
1879
1880    // Skip avx512 tiers when feature is off — _v4 functions likely behind cfg(feature = "avx512")
1881    let tiers = match resolve_tiers(
1882        &tier_names,
1883        input_fn.sig.ident.span(),
1884        cfg!(not(feature = "avx512")),
1885    ) {
1886        Ok(t) => t,
1887        Err(e) => return e.to_compile_error().into(),
1888    };
1889
1890    magetypes_impl(input_fn, &tiers)
1891}
1892
1893fn magetypes_impl(mut input_fn: LightFn, tiers: &[&TierDescriptor]) -> TokenStream {
1894    // Strip user-provided #[arcane] / #[rite] to prevent double-wrapping
1895    // (magetypes auto-adds #[arcane] on non-scalar variants)
1896    input_fn
1897        .attrs
1898        .retain(|attr| !attr.path().is_ident("arcane") && !attr.path().is_ident("rite"));
1899
1900    let fn_name = &input_fn.sig.ident;
1901    let fn_attrs = &input_fn.attrs;
1902
1903    // Convert function to string for text substitution
1904    let fn_str = input_fn.to_token_stream().to_string();
1905
1906    let mut variants = Vec::new();
1907
1908    for tier in tiers {
1909        // Create suffixed function name
1910        let suffixed_name = format!("{}_{}", fn_name, tier.suffix);
1911
1912        // Do text substitution
1913        let mut variant_str = fn_str.clone();
1914
1915        // Replace function name
1916        variant_str = variant_str.replacen(&fn_name.to_string(), &suffixed_name, 1);
1917
1918        // Replace Token type with concrete token
1919        variant_str = variant_str.replace("Token", tier.token_path);
1920
1921        // Parse back to tokens
1922        let variant_tokens: proc_macro2::TokenStream = match variant_str.parse() {
1923            Ok(t) => t,
1924            Err(e) => {
1925                return syn::Error::new_spanned(
1926                    &input_fn,
1927                    format!(
1928                        "Failed to parse generated variant `{}`: {}",
1929                        suffixed_name, e
1930                    ),
1931                )
1932                .to_compile_error()
1933                .into();
1934            }
1935        };
1936
1937        // Add cfg guard (arch only — no cargo feature checks in output)
1938        let cfg_guard = match tier.target_arch {
1939            Some(arch) => quote! { #[cfg(target_arch = #arch)] },
1940            None => quote! {},
1941        };
1942
1943        variants.push(if tier.name != "scalar" {
1944            // Non-scalar variants get #[arcane] so target_feature is applied
1945            quote! {
1946                #cfg_guard
1947                #[archmage::arcane]
1948                #variant_tokens
1949            }
1950        } else {
1951            quote! {
1952                #cfg_guard
1953                #variant_tokens
1954            }
1955        });
1956    }
1957
1958    // Remove attributes from the list that should not be duplicated
1959    let filtered_attrs: Vec<_> = fn_attrs
1960        .iter()
1961        .filter(|a| !a.path().is_ident("magetypes"))
1962        .collect();
1963
1964    let output = quote! {
1965        #(#filtered_attrs)*
1966        #(#variants)*
1967    };
1968
1969    output.into()
1970}
1971
1972// =============================================================================
1973// incant! macro - dispatch to platform-specific variants
1974// =============================================================================
1975
1976// =============================================================================
1977// Tier descriptors for incant! and #[magetypes]
1978// =============================================================================
1979
1980/// Describes a dispatch tier for incant! and #[magetypes].
1981struct TierDescriptor {
1982    /// Tier name as written in user code (e.g., "v3", "neon")
1983    name: &'static str,
1984    /// Function suffix (e.g., "v3", "neon", "scalar")
1985    suffix: &'static str,
1986    /// Token type path (e.g., "archmage::X64V3Token")
1987    token_path: &'static str,
1988    /// IntoConcreteToken method name (e.g., "as_x64v3")
1989    as_method: &'static str,
1990    /// Target architecture for cfg guard (None = no guard)
1991    target_arch: Option<&'static str>,
1992    /// Dispatch priority (higher = tried first within same arch)
1993    priority: u32,
1994}
1995
1996/// All known tiers in dispatch-priority order (highest first within arch).
1997const ALL_TIERS: &[TierDescriptor] = &[
1998    // x86: highest to lowest
1999    TierDescriptor {
2000        name: "v4x",
2001        suffix: "v4x",
2002        token_path: "archmage::X64V4xToken",
2003        as_method: "as_x64v4x",
2004        target_arch: Some("x86_64"),
2005
2006        priority: 50,
2007    },
2008    TierDescriptor {
2009        name: "v4",
2010        suffix: "v4",
2011        token_path: "archmage::X64V4Token",
2012        as_method: "as_x64v4",
2013        target_arch: Some("x86_64"),
2014
2015        priority: 40,
2016    },
2017    TierDescriptor {
2018        name: "v3_crypto",
2019        suffix: "v3_crypto",
2020        token_path: "archmage::X64V3CryptoToken",
2021        as_method: "as_x64v3_crypto",
2022        target_arch: Some("x86_64"),
2023
2024        priority: 35,
2025    },
2026    TierDescriptor {
2027        name: "v3",
2028        suffix: "v3",
2029        token_path: "archmage::X64V3Token",
2030        as_method: "as_x64v3",
2031        target_arch: Some("x86_64"),
2032
2033        priority: 30,
2034    },
2035    TierDescriptor {
2036        name: "x64_crypto",
2037        suffix: "x64_crypto",
2038        token_path: "archmage::X64CryptoToken",
2039        as_method: "as_x64_crypto",
2040        target_arch: Some("x86_64"),
2041
2042        priority: 25,
2043    },
2044    TierDescriptor {
2045        name: "v2",
2046        suffix: "v2",
2047        token_path: "archmage::X64V2Token",
2048        as_method: "as_x64v2",
2049        target_arch: Some("x86_64"),
2050
2051        priority: 20,
2052    },
2053    TierDescriptor {
2054        name: "v1",
2055        suffix: "v1",
2056        token_path: "archmage::X64V1Token",
2057        as_method: "as_x64v1",
2058        target_arch: Some("x86_64"),
2059
2060        priority: 10,
2061    },
2062    // ARM: highest to lowest
2063    TierDescriptor {
2064        name: "arm_v3",
2065        suffix: "arm_v3",
2066        token_path: "archmage::Arm64V3Token",
2067        as_method: "as_arm_v3",
2068        target_arch: Some("aarch64"),
2069
2070        priority: 50,
2071    },
2072    TierDescriptor {
2073        name: "arm_v2",
2074        suffix: "arm_v2",
2075        token_path: "archmage::Arm64V2Token",
2076        as_method: "as_arm_v2",
2077        target_arch: Some("aarch64"),
2078
2079        priority: 40,
2080    },
2081    TierDescriptor {
2082        name: "neon_aes",
2083        suffix: "neon_aes",
2084        token_path: "archmage::NeonAesToken",
2085        as_method: "as_neon_aes",
2086        target_arch: Some("aarch64"),
2087
2088        priority: 30,
2089    },
2090    TierDescriptor {
2091        name: "neon_sha3",
2092        suffix: "neon_sha3",
2093        token_path: "archmage::NeonSha3Token",
2094        as_method: "as_neon_sha3",
2095        target_arch: Some("aarch64"),
2096
2097        priority: 30,
2098    },
2099    TierDescriptor {
2100        name: "neon_crc",
2101        suffix: "neon_crc",
2102        token_path: "archmage::NeonCrcToken",
2103        as_method: "as_neon_crc",
2104        target_arch: Some("aarch64"),
2105
2106        priority: 30,
2107    },
2108    TierDescriptor {
2109        name: "neon",
2110        suffix: "neon",
2111        token_path: "archmage::NeonToken",
2112        as_method: "as_neon",
2113        target_arch: Some("aarch64"),
2114
2115        priority: 20,
2116    },
2117    // WASM
2118    TierDescriptor {
2119        name: "wasm128_relaxed",
2120        suffix: "wasm128_relaxed",
2121        token_path: "archmage::Wasm128RelaxedToken",
2122        as_method: "as_wasm128_relaxed",
2123        target_arch: Some("wasm32"),
2124
2125        priority: 21,
2126    },
2127    TierDescriptor {
2128        name: "wasm128",
2129        suffix: "wasm128",
2130        token_path: "archmage::Wasm128Token",
2131        as_method: "as_wasm128",
2132        target_arch: Some("wasm32"),
2133
2134        priority: 20,
2135    },
2136    // Scalar (always last)
2137    TierDescriptor {
2138        name: "scalar",
2139        suffix: "scalar",
2140        token_path: "archmage::ScalarToken",
2141        as_method: "as_scalar",
2142        target_arch: None,
2143
2144        priority: 0,
2145    },
2146];
2147
2148/// Default tiers for all dispatch macros. Always includes v4 in the list —
2149/// `resolve_tiers` with `skip_avx512=true` filters it out when the feature is off.
2150const DEFAULT_TIER_NAMES: &[&str] = &["v4", "v3", "neon", "wasm128", "scalar"];
2151
2152/// Whether `incant!` requires `scalar` in explicit tier lists.
2153/// Currently false for backwards compatibility. Flip to true in v1.0.
2154const REQUIRE_EXPLICIT_SCALAR: bool = false;
2155
2156/// Look up a tier by name, returning an error on unknown names.
2157fn find_tier(name: &str) -> Option<&'static TierDescriptor> {
2158    ALL_TIERS.iter().find(|t| t.name == name)
2159}
2160
2161/// Check if a tier's token requires AVX-512 features.
2162///
2163/// Uses the generated `token_to_features` registry to check if the tier's
2164/// canonical token has any feature starting with "avx512".
2165fn tier_requires_avx512(tier: &TierDescriptor) -> bool {
2166    // Extract the token name from the path (e.g., "archmage::X64V4Token" → "X64V4Token")
2167    let token_name = tier
2168        .token_path
2169        .rsplit("::")
2170        .next()
2171        .unwrap_or(tier.token_path);
2172    token_to_features(token_name)
2173        .is_some_and(|features| features.iter().any(|f| f.starts_with("avx512")))
2174}
2175
2176/// Resolve tier names to descriptors, sorted by dispatch priority (highest first).
2177/// Always appends "scalar" if not already present.
2178///
2179/// When `skip_avx512` is true, tiers whose tokens require AVX-512 features are
2180/// silently skipped instead of included. This is used by `incant!` and `#[magetypes]`
2181/// when the `avx512` feature is not enabled — the corresponding `_v4` functions
2182/// likely don't exist (they're behind `#[cfg(feature = "avx512")]`).
2183/// `#[autoversion]` passes `false` since it generates scalar code that doesn't
2184/// need the feature.
2185fn resolve_tiers(
2186    tier_names: &[String],
2187    error_span: proc_macro2::Span,
2188    skip_avx512: bool,
2189) -> syn::Result<Vec<&'static TierDescriptor>> {
2190    let mut tiers = Vec::new();
2191    for name in tier_names {
2192        match find_tier(name) {
2193            Some(tier) => {
2194                if skip_avx512 && tier_requires_avx512(tier) {
2195                    continue; // silently skip — _v4 function likely doesn't exist
2196                }
2197                tiers.push(tier);
2198            }
2199            None => {
2200                let known: Vec<&str> = ALL_TIERS.iter().map(|t| t.name).collect();
2201                return Err(syn::Error::new(
2202                    error_span,
2203                    format!("unknown tier `{}`. Known tiers: {}", name, known.join(", ")),
2204                ));
2205            }
2206        }
2207    }
2208
2209    // Always include scalar fallback
2210    if !tiers.iter().any(|t| t.name == "scalar") {
2211        tiers.push(find_tier("scalar").unwrap());
2212    }
2213
2214    // Sort by priority (highest first) for correct dispatch order
2215    tiers.sort_by(|a, b| b.priority.cmp(&a.priority));
2216
2217    Ok(tiers)
2218}
2219
2220// =============================================================================
2221// incant! macro - dispatch to platform-specific variants
2222// =============================================================================
2223
2224/// Input for the incant! macro
2225struct IncantInput {
2226    /// Function path to call (e.g. `func` or `module::func`)
2227    func_path: syn::Path,
2228    /// Arguments to pass
2229    args: Vec<syn::Expr>,
2230    /// Optional token variable for passthrough mode
2231    with_token: Option<syn::Expr>,
2232    /// Optional explicit tier list (None = default tiers)
2233    tiers: Option<(Vec<String>, proc_macro2::Span)>,
2234}
2235
2236/// Create a suffixed version of a function path.
2237/// e.g. `module::func` + `"v3"` → `module::func_v3`
2238fn suffix_path(path: &syn::Path, suffix: &str) -> syn::Path {
2239    let mut suffixed = path.clone();
2240    if let Some(last) = suffixed.segments.last_mut() {
2241        last.ident = format_ident!("{}_{}", last.ident, suffix);
2242    }
2243    suffixed
2244}
2245
2246impl Parse for IncantInput {
2247    fn parse(input: ParseStream) -> syn::Result<Self> {
2248        // Parse: function_path(arg1, arg2, ...) [with token_expr] [, [tier1, tier2, ...]]
2249        let func_path: syn::Path = input.parse()?;
2250
2251        // Parse parenthesized arguments
2252        let content;
2253        syn::parenthesized!(content in input);
2254        let args = content
2255            .parse_terminated(syn::Expr::parse, Token![,])?
2256            .into_iter()
2257            .collect();
2258
2259        // Check for optional "with token"
2260        let with_token = if input.peek(Ident) {
2261            let kw: Ident = input.parse()?;
2262            if kw != "with" {
2263                return Err(syn::Error::new_spanned(kw, "expected `with` keyword"));
2264            }
2265            Some(input.parse()?)
2266        } else {
2267            None
2268        };
2269
2270        // Check for optional tier list: , [tier1, tier2, ...]
2271        let tiers = if input.peek(Token![,]) {
2272            let _: Token![,] = input.parse()?;
2273            let bracket_content;
2274            let bracket = syn::bracketed!(bracket_content in input);
2275            let tier_idents = bracket_content.parse_terminated(Ident::parse, Token![,])?;
2276            let tier_names: Vec<String> = tier_idents.iter().map(|i| i.to_string()).collect();
2277            Some((tier_names, bracket.span.join()))
2278        } else {
2279            None
2280        };
2281
2282        Ok(IncantInput {
2283            func_path,
2284            args,
2285            with_token,
2286            tiers,
2287        })
2288    }
2289}
2290
2291/// Dispatch to platform-specific SIMD variants.
2292///
2293/// # Entry Point Mode (no token yet)
2294///
2295/// Summons tokens and dispatches to the best available variant:
2296///
2297/// ```rust,ignore
2298/// pub fn public_api(data: &[f32]) -> f32 {
2299///     incant!(dot(data))
2300/// }
2301/// ```
2302///
2303/// Expands to runtime feature detection + dispatch to `dot_v3`, `dot_v4`,
2304/// `dot_neon`, `dot_wasm128`, or `dot_scalar`.
2305///
2306/// # Explicit Tiers
2307///
2308/// Specify which tiers to dispatch to:
2309///
2310/// ```rust,ignore
2311/// // Only dispatch to v1, v3, neon, and scalar
2312/// pub fn api(data: &[f32]) -> f32 {
2313///     incant!(process(data), [v1, v3, neon, scalar])
2314/// }
2315/// ```
2316///
2317/// Always include `scalar` in explicit tier lists — `incant!` always
2318/// emits a `fn_scalar()` call as the final fallback, and listing it
2319/// documents this dependency. Currently auto-appended if omitted;
2320/// will become a compile error in v1.0. Unknown tier names cause a
2321/// compile error. Tiers are automatically sorted into correct
2322/// dispatch order (highest priority first).
2323///
2324/// Known tiers: `v1`, `v2`, `v3`, `v4`, `v4x`, `neon`, `neon_aes`,
2325/// `neon_sha3`, `neon_crc`, `wasm128`, `wasm128_relaxed`, `scalar`.
2326///
2327/// # Passthrough Mode (already have token)
2328///
2329/// Uses compile-time dispatch via `IntoConcreteToken`:
2330///
2331/// ```rust,ignore
2332/// #[arcane]
2333/// fn outer(token: X64V3Token, data: &[f32]) -> f32 {
2334///     incant!(inner(data) with token)
2335/// }
2336/// ```
2337///
2338/// Also supports explicit tiers:
2339///
2340/// ```rust,ignore
2341/// fn inner<T: IntoConcreteToken>(token: T, data: &[f32]) -> f32 {
2342///     incant!(process(data) with token, [v3, neon, scalar])
2343/// }
2344/// ```
2345///
2346/// The compiler monomorphizes the dispatch, eliminating non-matching branches.
2347///
2348/// # Variant Naming
2349///
2350/// Functions must have suffixed variants matching the selected tiers:
2351/// - `_v1` for `X64V1Token`
2352/// - `_v2` for `X64V2Token`
2353/// - `_v3` for `X64V3Token`
2354/// - `_v4` for `X64V4Token` (requires `avx512` feature)
2355/// - `_v4x` for `X64V4xToken` (requires `avx512` feature)
2356/// - `_neon` for `NeonToken`
2357/// - `_neon_aes` for `NeonAesToken`
2358/// - `_neon_sha3` for `NeonSha3Token`
2359/// - `_neon_crc` for `NeonCrcToken`
2360/// - `_wasm128` for `Wasm128Token`
2361/// - `_scalar` for `ScalarToken`
2362#[proc_macro]
2363pub fn incant(input: TokenStream) -> TokenStream {
2364    let input = parse_macro_input!(input as IncantInput);
2365    incant_impl(input)
2366}
2367
2368/// Legacy alias for [`incant!`].
2369#[proc_macro]
2370pub fn simd_route(input: TokenStream) -> TokenStream {
2371    let input = parse_macro_input!(input as IncantInput);
2372    incant_impl(input)
2373}
2374
2375/// Descriptive alias for [`incant!`].
2376///
2377/// Dispatches to architecture-specific function variants at runtime.
2378/// Looks for suffixed functions (`_v3`, `_v4`, `_neon`, `_wasm128`, `_scalar`)
2379/// and calls the best one the CPU supports.
2380///
2381/// See [`incant!`] for full documentation and examples.
2382#[proc_macro]
2383pub fn dispatch_variant(input: TokenStream) -> TokenStream {
2384    let input = parse_macro_input!(input as IncantInput);
2385    incant_impl(input)
2386}
2387
2388fn incant_impl(input: IncantInput) -> TokenStream {
2389    let func_path = &input.func_path;
2390    let args = &input.args;
2391
2392    // Resolve tiers
2393    let tier_names: Vec<String> = match &input.tiers {
2394        Some((names, _)) => names.clone(),
2395        None => DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect(),
2396    };
2397    let last_segment_span = func_path
2398        .segments
2399        .last()
2400        .map(|s| s.ident.span())
2401        .unwrap_or_else(proc_macro2::Span::call_site);
2402    let error_span = input
2403        .tiers
2404        .as_ref()
2405        .map(|(_, span)| *span)
2406        .unwrap_or(last_segment_span);
2407
2408    // When the user specifies explicit tiers, require `scalar` in the list.
2409    // This forces acknowledgment that a scalar fallback path exists and must
2410    // be implemented. Default tiers (no bracket list) always include scalar.
2411    // TODO(v1.0): flip REQUIRE_EXPLICIT_SCALAR to true
2412    if REQUIRE_EXPLICIT_SCALAR
2413        && let Some((names, span)) = &input.tiers
2414        && !names.iter().any(|n| n == "scalar")
2415    {
2416        return syn::Error::new(
2417            *span,
2418            "explicit tier list must include `scalar`. \
2419             incant! always dispatches to fn_scalar() as the final fallback, \
2420             so `scalar` must appear in the tier list to acknowledge this. \
2421             Example: [v3, neon, scalar]",
2422        )
2423        .to_compile_error()
2424        .into();
2425    }
2426
2427    // Skip avx512 tiers when feature is off — _v4 functions likely behind cfg(feature = "avx512")
2428    let tiers = match resolve_tiers(&tier_names, error_span, cfg!(not(feature = "avx512"))) {
2429        Ok(t) => t,
2430        Err(e) => return e.to_compile_error().into(),
2431    };
2432
2433    // Group tiers by architecture for cfg-guarded blocks
2434    // Within each arch, tiers are already sorted by priority (highest first)
2435    if let Some(token_expr) = &input.with_token {
2436        gen_incant_passthrough(func_path, args, token_expr, &tiers)
2437    } else {
2438        gen_incant_entry(func_path, args, &tiers)
2439    }
2440}
2441
2442/// Generate incant! passthrough mode (already have a token).
2443fn gen_incant_passthrough(
2444    func_path: &syn::Path,
2445    args: &[syn::Expr],
2446    token_expr: &syn::Expr,
2447    tiers: &[&TierDescriptor],
2448) -> TokenStream {
2449    let mut dispatch_arms = Vec::new();
2450
2451    // Group non-scalar tiers by target_arch for cfg blocks
2452    let mut arch_groups: Vec<(Option<&str>, Vec<&TierDescriptor>)> = Vec::new();
2453    for tier in tiers {
2454        if tier.name == "scalar" {
2455            continue; // Handle scalar separately at the end
2456        }
2457        if let Some(group) = arch_groups.iter_mut().find(|(a, _)| *a == tier.target_arch) {
2458            group.1.push(tier);
2459        } else {
2460            arch_groups.push((tier.target_arch, vec![tier]));
2461        }
2462    }
2463
2464    for (target_arch, group_tiers) in &arch_groups {
2465        let mut tier_checks = Vec::new();
2466        for tier in group_tiers {
2467            let fn_suffixed = suffix_path(func_path, tier.suffix);
2468            let as_method = format_ident!("{}", tier.as_method);
2469            tier_checks.push(quote! {
2470                if let Some(__t) = __incant_token.#as_method() {
2471                    break '__incant #fn_suffixed(__t, #(#args),*);
2472                }
2473            });
2474        }
2475
2476        let inner = quote! { #(#tier_checks)* };
2477
2478        if let Some(arch) = target_arch {
2479            dispatch_arms.push(quote! {
2480                #[cfg(target_arch = #arch)]
2481                { #inner }
2482            });
2483        } else {
2484            dispatch_arms.push(inner);
2485        }
2486    }
2487
2488    // Scalar fallback (always last)
2489    let fn_scalar = suffix_path(func_path, "scalar");
2490    let scalar_arm = if tiers.iter().any(|t| t.name == "scalar") {
2491        quote! {
2492            if let Some(__t) = __incant_token.as_scalar() {
2493                break '__incant #fn_scalar(__t, #(#args),*);
2494            }
2495            unreachable!("Token did not match any known variant")
2496        }
2497    } else {
2498        quote! { unreachable!("Token did not match any known variant") }
2499    };
2500
2501    let expanded = quote! {
2502        '__incant: {
2503            use archmage::IntoConcreteToken;
2504            let __incant_token = #token_expr;
2505            #(#dispatch_arms)*
2506            #scalar_arm
2507        }
2508    };
2509    expanded.into()
2510}
2511
2512/// Generate incant! entry point mode (summon tokens).
2513fn gen_incant_entry(
2514    func_path: &syn::Path,
2515    args: &[syn::Expr],
2516    tiers: &[&TierDescriptor],
2517) -> TokenStream {
2518    let mut dispatch_arms = Vec::new();
2519
2520    // Group non-scalar tiers by target_arch for cfg blocks.
2521    let mut arch_groups: Vec<(Option<&str>, Vec<&TierDescriptor>)> = Vec::new();
2522    for tier in tiers {
2523        if tier.name == "scalar" {
2524            continue;
2525        }
2526        if let Some(group) = arch_groups.iter_mut().find(|(a, _)| *a == tier.target_arch) {
2527            group.1.push(tier);
2528        } else {
2529            arch_groups.push((tier.target_arch, vec![tier]));
2530        }
2531    }
2532
2533    for (target_arch, group_tiers) in &arch_groups {
2534        let mut tier_checks = Vec::new();
2535        for tier in group_tiers {
2536            let fn_suffixed = suffix_path(func_path, tier.suffix);
2537            let token_path: syn::Path = syn::parse_str(tier.token_path).unwrap();
2538
2539            tier_checks.push(quote! {
2540                if let Some(__t) = #token_path::summon() {
2541                    break '__incant #fn_suffixed(__t, #(#args),*);
2542                }
2543            });
2544        }
2545
2546        let inner = quote! { #(#tier_checks)* };
2547
2548        if let Some(arch) = target_arch {
2549            dispatch_arms.push(quote! {
2550                #[cfg(target_arch = #arch)]
2551                { #inner }
2552            });
2553        } else {
2554            dispatch_arms.push(inner);
2555        }
2556    }
2557
2558    // Scalar fallback
2559    let fn_scalar = suffix_path(func_path, "scalar");
2560
2561    let expanded = quote! {
2562        '__incant: {
2563            use archmage::SimdToken;
2564            #(#dispatch_arms)*
2565            #fn_scalar(archmage::ScalarToken, #(#args),*)
2566        }
2567    };
2568    expanded.into()
2569}
2570
2571// =============================================================================
2572// autoversion - combined variant generation + dispatch
2573// =============================================================================
2574
2575/// Arguments to the `#[autoversion]` macro.
2576struct AutoversionArgs {
2577    /// The concrete type to use for `self` receiver (inherent methods only).
2578    self_type: Option<Type>,
2579    /// Explicit tier names (None = default tiers).
2580    tiers: Option<Vec<String>>,
2581}
2582
2583impl Parse for AutoversionArgs {
2584    fn parse(input: ParseStream) -> syn::Result<Self> {
2585        let mut self_type = None;
2586        let mut tier_names = Vec::new();
2587
2588        while !input.is_empty() {
2589            let ident: Ident = input.parse()?;
2590            if ident == "_self" {
2591                let _: Token![=] = input.parse()?;
2592                self_type = Some(input.parse()?);
2593            } else {
2594                // Treat as tier name — validated later by resolve_tiers
2595                tier_names.push(ident.to_string());
2596            }
2597            if input.peek(Token![,]) {
2598                let _: Token![,] = input.parse()?;
2599            }
2600        }
2601
2602        Ok(AutoversionArgs {
2603            self_type,
2604            tiers: if tier_names.is_empty() {
2605                None
2606            } else {
2607                Some(tier_names)
2608            },
2609        })
2610    }
2611}
2612
2613/// Information about the `SimdToken` parameter found in a function signature.
2614struct SimdTokenParamInfo {
2615    /// Index of the parameter in `sig.inputs`
2616    index: usize,
2617    /// The parameter identifier
2618    #[allow(dead_code)]
2619    ident: Ident,
2620}
2621
2622/// Find the `SimdToken` parameter in a function signature.
2623///
2624/// Searches all typed parameters for one whose type path ends in `SimdToken`.
2625/// Returns the parameter index and identifier, or `None` if not found.
2626fn find_simd_token_param(sig: &Signature) -> Option<SimdTokenParamInfo> {
2627    for (i, arg) in sig.inputs.iter().enumerate() {
2628        if let FnArg::Typed(PatType { pat, ty, .. }) = arg
2629            && let Type::Path(type_path) = ty.as_ref()
2630            && let Some(seg) = type_path.path.segments.last()
2631            && seg.ident == "SimdToken"
2632        {
2633            let ident = match pat.as_ref() {
2634                syn::Pat::Ident(pi) => pi.ident.clone(),
2635                syn::Pat::Wild(w) => Ident::new("__autoversion_token", w.underscore_token.span),
2636                _ => continue,
2637            };
2638            return Some(SimdTokenParamInfo { index: i, ident });
2639        }
2640    }
2641    None
2642}
2643
2644/// Core implementation for `#[autoversion]`.
2645///
2646/// Generates suffixed SIMD variants (like `#[magetypes]`) and a runtime
2647/// dispatcher function (like `incant!`) from a single annotated function.
2648fn autoversion_impl(mut input_fn: LightFn, args: AutoversionArgs) -> TokenStream {
2649    // Check for self receiver
2650    let has_self = input_fn
2651        .sig
2652        .inputs
2653        .first()
2654        .is_some_and(|arg| matches!(arg, FnArg::Receiver(_)));
2655
2656    // _self = Type is only needed for trait impls (nested mode in #[arcane]).
2657    // For inherent methods, self/Self work naturally in sibling mode.
2658
2659    // Find SimdToken parameter
2660    let token_param = match find_simd_token_param(&input_fn.sig) {
2661        Some(p) => p,
2662        None => {
2663            return syn::Error::new_spanned(
2664                &input_fn.sig,
2665                "autoversion requires a `SimdToken` parameter.\n\
2666                 Example: fn process(token: SimdToken, data: &[f32]) -> f32 { ... }\n\n\
2667                 SimdToken is the dispatch placeholder — autoversion replaces it \
2668                 with concrete token types and generates a runtime dispatcher.",
2669            )
2670            .to_compile_error()
2671            .into();
2672        }
2673    };
2674
2675    // Resolve tiers — autoversion always includes v4 in its defaults because it
2676    // generates scalar code compiled with #[target_feature], not import_intrinsics.
2677    let tier_names: Vec<String> = match &args.tiers {
2678        Some(names) => names.clone(),
2679        None => DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect(),
2680    };
2681    // autoversion never skips avx512 — it generates scalar code with #[target_feature]
2682    let tiers = match resolve_tiers(&tier_names, input_fn.sig.ident.span(), false) {
2683        Ok(t) => t,
2684        Err(e) => return e.to_compile_error().into(),
2685    };
2686
2687    // Strip #[arcane] / #[rite] to prevent double-wrapping
2688    input_fn
2689        .attrs
2690        .retain(|attr| !attr.path().is_ident("arcane") && !attr.path().is_ident("rite"));
2691
2692    let fn_name = &input_fn.sig.ident;
2693    let vis = input_fn.vis.clone();
2694
2695    // Move attrs to dispatcher only; variants get no user attrs
2696    let fn_attrs: Vec<Attribute> = input_fn.attrs.drain(..).collect();
2697
2698    // =========================================================================
2699    // Generate suffixed variants
2700    // =========================================================================
2701    //
2702    // AST manipulation only — we clone the parsed LightFn and swap the token
2703    // param's type annotation. No serialize/reparse round-trip. The body is
2704    // never touched unless _self = Type requires a `let _self = self;`
2705    // preamble on the scalar variant.
2706
2707    let mut variants = Vec::new();
2708
2709    for tier in &tiers {
2710        let mut variant_fn = input_fn.clone();
2711
2712        // Variants are always private — only the dispatcher is public.
2713        variant_fn.vis = syn::Visibility::Inherited;
2714
2715        // Rename: process → process_v3
2716        variant_fn.sig.ident = format_ident!("{}_{}", fn_name, tier.suffix);
2717
2718        // Replace SimdToken param type with concrete token type
2719        let concrete_type: Type = syn::parse_str(tier.token_path).unwrap();
2720        if let FnArg::Typed(pt) = &mut variant_fn.sig.inputs[token_param.index] {
2721            *pt.ty = concrete_type;
2722        }
2723
2724        // Scalar with _self = Type: inject `let _self = self;` preamble so body's
2725        // _self references resolve (non-scalar variants get this from #[arcane(_self = Type)])
2726        if tier.name == "scalar" && has_self && args.self_type.is_some() {
2727            let original_body = variant_fn.body.clone();
2728            variant_fn.body = quote!(let _self = self; #original_body);
2729        }
2730
2731        let cfg_guard = match tier.target_arch {
2732            Some(arch) => quote! { #[cfg(target_arch = #arch)] },
2733            None => quote! {},
2734        };
2735
2736        // All variants are private implementation details of the dispatcher.
2737        // Suppress dead_code: if the dispatcher is unused, rustc warns on IT
2738        // (via quote_spanned! with the user's span). Warning on individual
2739        // variants would be confusing — the user didn't write _scalar or _v3.
2740        if tier.name != "scalar" {
2741            let arcane_attr = if let Some(ref self_type) = args.self_type {
2742                quote! { #[archmage::arcane(_self = #self_type)] }
2743            } else {
2744                quote! { #[archmage::arcane] }
2745            };
2746            variants.push(quote! {
2747                #cfg_guard
2748                #[allow(dead_code)]
2749                #arcane_attr
2750                #variant_fn
2751            });
2752        } else {
2753            variants.push(quote! {
2754                #cfg_guard
2755                #[allow(dead_code)]
2756                #variant_fn
2757            });
2758        }
2759    }
2760
2761    // =========================================================================
2762    // Generate dispatcher (adapted from gen_incant_entry)
2763    // =========================================================================
2764
2765    // Build dispatcher inputs: original params minus SimdToken
2766    let mut dispatcher_inputs: Vec<FnArg> = input_fn.sig.inputs.iter().cloned().collect();
2767    dispatcher_inputs.remove(token_param.index);
2768
2769    // Rename wildcard params so we can pass them as arguments
2770    let mut wild_counter = 0u32;
2771    for arg in &mut dispatcher_inputs {
2772        if let FnArg::Typed(pat_type) = arg
2773            && matches!(pat_type.pat.as_ref(), syn::Pat::Wild(_))
2774        {
2775            let ident = format_ident!("__autoversion_wild_{}", wild_counter);
2776            wild_counter += 1;
2777            *pat_type.pat = syn::Pat::Ident(syn::PatIdent {
2778                attrs: vec![],
2779                by_ref: None,
2780                mutability: None,
2781                ident,
2782                subpat: None,
2783            });
2784        }
2785    }
2786
2787    // Collect argument idents for dispatch calls (exclude self receiver)
2788    let dispatch_args: Vec<Ident> = dispatcher_inputs
2789        .iter()
2790        .filter_map(|arg| {
2791            if let FnArg::Typed(PatType { pat, .. }) = arg
2792                && let syn::Pat::Ident(pi) = pat.as_ref()
2793            {
2794                return Some(pi.ident.clone());
2795            }
2796            None
2797        })
2798        .collect();
2799
2800    // Build turbofish for forwarding type/const generics to variant calls
2801    let turbofish = build_turbofish(&input_fn.sig.generics);
2802
2803    // Group non-scalar tiers by target_arch for cfg blocks
2804    let mut arch_groups: Vec<(Option<&str>, Vec<&&TierDescriptor>)> = Vec::new();
2805    for tier in &tiers {
2806        if tier.name == "scalar" {
2807            continue;
2808        }
2809        if let Some(group) = arch_groups.iter_mut().find(|(a, _)| *a == tier.target_arch) {
2810            group.1.push(tier);
2811        } else {
2812            arch_groups.push((tier.target_arch, vec![tier]));
2813        }
2814    }
2815
2816    let mut dispatch_arms = Vec::new();
2817    for (target_arch, group_tiers) in &arch_groups {
2818        let mut tier_checks = Vec::new();
2819        for tier in group_tiers {
2820            let suffixed = format_ident!("{}_{}", fn_name, tier.suffix);
2821            let token_path: syn::Path = syn::parse_str(tier.token_path).unwrap();
2822
2823            let call = if has_self {
2824                quote! { self.#suffixed #turbofish(__t, #(#dispatch_args),*) }
2825            } else {
2826                quote! { #suffixed #turbofish(__t, #(#dispatch_args),*) }
2827            };
2828
2829            tier_checks.push(quote! {
2830                if let Some(__t) = #token_path::summon() {
2831                    break '__dispatch #call;
2832                }
2833            });
2834        }
2835
2836        let inner = quote! { #(#tier_checks)* };
2837
2838        if let Some(arch) = target_arch {
2839            dispatch_arms.push(quote! {
2840                #[cfg(target_arch = #arch)]
2841                { #inner }
2842            });
2843        } else {
2844            dispatch_arms.push(inner);
2845        }
2846    }
2847
2848    // Scalar fallback (always available, no summon needed)
2849    let scalar_name = format_ident!("{}_scalar", fn_name);
2850    let scalar_call = if has_self {
2851        quote! { self.#scalar_name #turbofish(archmage::ScalarToken, #(#dispatch_args),*) }
2852    } else {
2853        quote! { #scalar_name #turbofish(archmage::ScalarToken, #(#dispatch_args),*) }
2854    };
2855
2856    // Build dispatcher function
2857    let dispatcher_inputs_punct: syn::punctuated::Punctuated<FnArg, Token![,]> =
2858        dispatcher_inputs.into_iter().collect();
2859    let output = &input_fn.sig.output;
2860    let generics = &input_fn.sig.generics;
2861    let where_clause = &generics.where_clause;
2862
2863    // Use the user's span for the dispatcher so dead_code lint fires on the
2864    // function the user actually wrote, not on invisible generated variants.
2865    let user_span = fn_name.span();
2866    let dispatcher = quote_spanned! { user_span =>
2867        #(#fn_attrs)*
2868        #vis fn #fn_name #generics (#dispatcher_inputs_punct) #output #where_clause {
2869            '__dispatch: {
2870                use archmage::SimdToken;
2871                #(#dispatch_arms)*
2872                #scalar_call
2873            }
2874        }
2875    };
2876
2877    let expanded = quote! {
2878        #dispatcher
2879        #(#variants)*
2880    };
2881
2882    expanded.into()
2883}
2884
2885/// Let the compiler auto-vectorize scalar code for each architecture.
2886///
2887/// Write a plain scalar function with a `SimdToken` placeholder parameter.
2888/// `#[autoversion]` generates architecture-specific copies — each compiled
2889/// with different `#[target_feature]` flags via `#[arcane]` — plus a runtime
2890/// dispatcher that calls the best one the CPU supports.
2891///
2892/// You don't touch intrinsics, don't import SIMD types, don't think about
2893/// lane widths. The compiler's auto-vectorizer does the work; you give it
2894/// permission via `#[target_feature]`, which `#[autoversion]` handles.
2895///
2896/// # The simple win
2897///
2898/// ```rust,ignore
2899/// use archmage::SimdToken;
2900///
2901/// #[autoversion]
2902/// fn sum_of_squares(_token: SimdToken, data: &[f32]) -> f32 {
2903///     let mut sum = 0.0f32;
2904///     for &x in data {
2905///         sum += x * x;
2906///     }
2907///     sum
2908/// }
2909///
2910/// // Call directly — no token, no unsafe:
2911/// let result = sum_of_squares(&my_data);
2912/// ```
2913///
2914/// The `_token` parameter is never used in the body. It exists so the macro
2915/// knows where to substitute concrete token types. Each generated variant
2916/// gets `#[arcane]` → `#[target_feature(enable = "avx2,fma,...")]`, which
2917/// unlocks the compiler's auto-vectorizer for that feature set.
2918///
2919/// On x86-64 with the `_v3` variant (AVX2+FMA), that loop compiles to
2920/// `vfmadd231ps` — fused multiply-add on 8 floats per cycle. On aarch64
2921/// with NEON, you get `fmla`. The `_scalar` fallback compiles without any
2922/// SIMD target features, as a safety net for unknown hardware.
2923///
2924/// # Chunks + remainder
2925///
2926/// The classic data-processing pattern works naturally:
2927///
2928/// ```rust,ignore
2929/// #[autoversion]
2930/// fn normalize(_token: SimdToken, data: &mut [f32], scale: f32) {
2931///     // Compiler auto-vectorizes this — no manual SIMD needed.
2932///     // On v3, this becomes vdivps + vmulps on 8 floats at a time.
2933///     for x in data.iter_mut() {
2934///         *x = (*x - 128.0) * scale;
2935///     }
2936/// }
2937/// ```
2938///
2939/// If you want explicit control over chunk boundaries (e.g., for
2940/// accumulator patterns), that works too:
2941///
2942/// ```rust,ignore
2943/// #[autoversion]
2944/// fn dot_product(_token: SimdToken, a: &[f32], b: &[f32]) -> f32 {
2945///     let n = a.len().min(b.len());
2946///     let mut sum = 0.0f32;
2947///     for i in 0..n {
2948///         sum += a[i] * b[i];
2949///     }
2950///     sum
2951/// }
2952/// ```
2953///
2954/// The compiler decides the chunk size based on the target features of each
2955/// variant (8 floats for AVX2, 4 for NEON, 1 for scalar).
2956///
2957/// # What gets generated
2958///
2959/// With default tiers, `#[autoversion] fn process(_t: SimdToken, data: &[f32]) -> f32`
2960/// expands to:
2961///
2962/// - `process_v4(token: X64V4Token, ...)` — AVX-512 (behind `#[cfg(feature = "avx512")]`)
2963/// - `process_v3(token: X64V3Token, ...)` — AVX2+FMA
2964/// - `process_neon(token: NeonToken, ...)` — aarch64 NEON
2965/// - `process_wasm128(token: Wasm128Token, ...)` — WASM SIMD
2966/// - `process_scalar(token: ScalarToken, ...)` — no SIMD, always available
2967/// - `process(data: &[f32]) -> f32` — **dispatcher** (SimdToken param removed)
2968///
2969/// Each non-scalar variant is wrapped in `#[arcane]` (for `#[target_feature]`)
2970/// and `#[cfg(target_arch = ...)]`. The dispatcher does runtime CPU feature
2971/// detection via `Token::summon()` and calls the best match. When compiled
2972/// with `-C target-cpu=native`, the detection is elided by the compiler.
2973///
2974/// The suffixed variants are private sibling functions — only the dispatcher
2975/// is public. Within the same module, you can call them directly for testing
2976/// or benchmarking.
2977///
2978/// # SimdToken replacement
2979///
2980/// `#[autoversion]` replaces the `SimdToken` type annotation in the function
2981/// signature with the concrete token type for each variant (e.g.,
2982/// `archmage::X64V3Token`). Only the parameter's type changes — the function
2983/// body is never reparsed, which keeps compile times low.
2984///
2985/// The token variable (whatever you named it — `token`, `_token`, `_t`)
2986/// keeps working in the body because its type comes from the signature.
2987/// So `f32x8::from_array(token, ...)` works — `token` is now an `X64V3Token`
2988/// which satisfies the same trait bounds as `SimdToken`.
2989///
2990/// `#[magetypes]` takes a different approach: it replaces the text `Token`
2991/// everywhere in the function — signature and body — via string substitution.
2992/// Use `#[magetypes]` when you need body-level type substitution (e.g.,
2993/// `Token`-dependent constants or type aliases that differ per variant).
2994/// Use `#[autoversion]` when you want compiler auto-vectorization of scalar
2995/// code with zero boilerplate.
2996///
2997/// # Benchmarking
2998///
2999/// Measure the speedup with a side-by-side comparison. The generated
3000/// `_scalar` variant serves as the baseline; the dispatcher picks the
3001/// best available:
3002///
3003/// ```rust,ignore
3004/// use criterion::{Criterion, black_box, criterion_group, criterion_main};
3005/// use archmage::SimdToken;
3006///
3007/// #[autoversion]
3008/// fn sum_squares(_token: SimdToken, data: &[f32]) -> f32 {
3009///     data.iter().map(|&x| x * x).fold(0.0f32, |a, b| a + b)
3010/// }
3011///
3012/// fn bench(c: &mut Criterion) {
3013///     let data: Vec<f32> = (0..4096).map(|i| i as f32 * 0.01).collect();
3014///     let mut group = c.benchmark_group("sum_squares");
3015///
3016///     // Dispatched — picks best available at runtime
3017///     group.bench_function("dispatched", |b| {
3018///         b.iter(|| sum_squares(black_box(&data)))
3019///     });
3020///
3021///     // Scalar baseline — no target_feature, no auto-vectorization
3022///     group.bench_function("scalar", |b| {
3023///         b.iter(|| sum_squares_scalar(archmage::ScalarToken, black_box(&data)))
3024///     });
3025///
3026///     // Specific tier (useful for isolating which tier wins)
3027///     #[cfg(target_arch = "x86_64")]
3028///     if let Some(t) = archmage::X64V3Token::summon() {
3029///         group.bench_function("v3_avx2_fma", |b| {
3030///             b.iter(|| sum_squares_v3(t, black_box(&data)));
3031///         });
3032///     }
3033///
3034///     group.finish();
3035/// }
3036///
3037/// criterion_group!(benches, bench);
3038/// criterion_main!(benches);
3039/// ```
3040///
3041/// For a tight numeric loop on x86-64, the `_v3` variant (AVX2+FMA)
3042/// typically runs 4-8x faster than `_scalar` because `#[target_feature]`
3043/// unlocks auto-vectorization that the baseline build can't use.
3044///
3045/// # Explicit tiers
3046///
3047/// ```rust,ignore
3048/// #[autoversion(v3, v4, v4x, neon, arm_v2, wasm128)]
3049/// fn process(_token: SimdToken, data: &[f32]) -> f32 {
3050///     // ...
3051/// }
3052/// ```
3053///
3054/// `scalar` is always included implicitly.
3055///
3056/// Default tiers (when no list given): `v4`, `v3`, `neon`, `wasm128`, `scalar`.
3057///
3058/// Known tiers: `v1`, `v2`, `v3`, `v3_crypto`, `v4`, `v4x`, `neon`,
3059/// `neon_aes`, `neon_sha3`, `neon_crc`, `arm_v2`, `arm_v3`, `wasm128`,
3060/// `wasm128_relaxed`, `x64_crypto`, `scalar`.
3061///
3062/// # Methods with self receivers
3063///
3064/// For inherent methods, `self` works naturally — no `_self` needed:
3065///
3066/// ```rust,ignore
3067/// impl ImageBuffer {
3068///     #[autoversion]
3069///     fn normalize(&mut self, token: SimdToken, gamma: f32) {
3070///         for pixel in &mut self.data {
3071///             *pixel = (*pixel / 255.0).powf(gamma);
3072///         }
3073///     }
3074/// }
3075///
3076/// // Call normally — no token:
3077/// buffer.normalize(2.2);
3078/// ```
3079///
3080/// All receiver types work: `self`, `&self`, `&mut self`. Non-scalar variants
3081/// get `#[arcane]` (sibling mode), where `self`/`Self` resolve naturally.
3082///
3083/// # Trait methods (requires `_self = Type`)
3084///
3085/// Trait methods can't use `#[autoversion]` directly because proc macro
3086/// attributes on trait impl items can't expand to multiple sibling functions.
3087/// Use the delegation pattern with `_self = Type`:
3088///
3089/// ```rust,ignore
3090/// trait Processor {
3091///     fn process(&self, data: &[f32]) -> f32;
3092/// }
3093///
3094/// impl Processor for MyType {
3095///     fn process(&self, data: &[f32]) -> f32 {
3096///         self.process_impl(data) // delegate to autoversioned method
3097///     }
3098/// }
3099///
3100/// impl MyType {
3101///     #[autoversion(_self = MyType)]
3102///     fn process_impl(&self, token: SimdToken, data: &[f32]) -> f32 {
3103///         _self.weights.iter().zip(data).map(|(w, d)| w * d).sum()
3104///     }
3105/// }
3106/// ```
3107///
3108/// `_self = Type` uses nested mode in `#[arcane]`, which is required for
3109/// trait impls. Use `_self` (not `self`) in the body when using this form.
3110///
3111/// # Comparison with `#[magetypes]` + `incant!`
3112///
3113/// | | `#[autoversion]` | `#[magetypes]` + `incant!` |
3114/// |---|---|---|
3115/// | Placeholder | `SimdToken` | `Token` |
3116/// | Generates variants | Yes | Yes (magetypes) |
3117/// | Generates dispatcher | Yes | No (you write `incant!`) |
3118/// | Best for | Scalar auto-vectorization | Explicit SIMD with typed vectors |
3119/// | Lines of code | 1 attribute | 2+ (magetypes + incant + arcane) |
3120///
3121/// Use `#[autoversion]` for scalar loops you want auto-vectorized. Use
3122/// `#[magetypes]` + `incant!` when you need `f32x8`, `u8x32`, and
3123/// hand-tuned SIMD code per architecture
3124#[proc_macro_attribute]
3125pub fn autoversion(attr: TokenStream, item: TokenStream) -> TokenStream {
3126    let args = parse_macro_input!(attr as AutoversionArgs);
3127    let input_fn = parse_macro_input!(item as LightFn);
3128    autoversion_impl(input_fn, args)
3129}
3130
3131// =============================================================================
3132// Unit tests for token/trait recognition maps
3133// =============================================================================
3134
3135#[cfg(test)]
3136mod tests {
3137    use super::*;
3138
3139    use super::generated::{ALL_CONCRETE_TOKENS, ALL_TRAIT_NAMES};
3140    use syn::{ItemFn, ReturnType};
3141
3142    #[test]
3143    fn every_concrete_token_is_in_token_to_features() {
3144        for &name in ALL_CONCRETE_TOKENS {
3145            assert!(
3146                token_to_features(name).is_some(),
3147                "Token `{}` exists in runtime crate but is NOT recognized by \
3148                 token_to_features() in the proc macro. Add it!",
3149                name
3150            );
3151        }
3152    }
3153
3154    #[test]
3155    fn every_trait_is_in_trait_to_features() {
3156        for &name in ALL_TRAIT_NAMES {
3157            assert!(
3158                trait_to_features(name).is_some(),
3159                "Trait `{}` exists in runtime crate but is NOT recognized by \
3160                 trait_to_features() in the proc macro. Add it!",
3161                name
3162            );
3163        }
3164    }
3165
3166    #[test]
3167    fn token_aliases_map_to_same_features() {
3168        // Desktop64 = X64V3Token
3169        assert_eq!(
3170            token_to_features("Desktop64"),
3171            token_to_features("X64V3Token"),
3172            "Desktop64 and X64V3Token should map to identical features"
3173        );
3174
3175        // Server64 = X64V4Token = Avx512Token
3176        assert_eq!(
3177            token_to_features("Server64"),
3178            token_to_features("X64V4Token"),
3179            "Server64 and X64V4Token should map to identical features"
3180        );
3181        assert_eq!(
3182            token_to_features("X64V4Token"),
3183            token_to_features("Avx512Token"),
3184            "X64V4Token and Avx512Token should map to identical features"
3185        );
3186
3187        // Arm64 = NeonToken
3188        assert_eq!(
3189            token_to_features("Arm64"),
3190            token_to_features("NeonToken"),
3191            "Arm64 and NeonToken should map to identical features"
3192        );
3193    }
3194
3195    #[test]
3196    fn trait_to_features_includes_tokens_as_bounds() {
3197        // Tier tokens should also work as trait bounds
3198        // (for `impl X64V3Token` patterns, even though Rust won't allow it,
3199        // the macro processes AST before type checking)
3200        let tier_tokens = [
3201            "X64V2Token",
3202            "X64CryptoToken",
3203            "X64V3Token",
3204            "Desktop64",
3205            "Avx2FmaToken",
3206            "X64V4Token",
3207            "Avx512Token",
3208            "Server64",
3209            "X64V4xToken",
3210            "Avx512Fp16Token",
3211            "NeonToken",
3212            "Arm64",
3213            "NeonAesToken",
3214            "NeonSha3Token",
3215            "NeonCrcToken",
3216            "Arm64V2Token",
3217            "Arm64V3Token",
3218        ];
3219
3220        for &name in &tier_tokens {
3221            assert!(
3222                trait_to_features(name).is_some(),
3223                "Tier token `{}` should also be recognized in trait_to_features() \
3224                 for use as a generic bound. Add it!",
3225                name
3226            );
3227        }
3228    }
3229
3230    #[test]
3231    fn trait_features_are_cumulative() {
3232        // HasX64V4 should include all HasX64V2 features plus more
3233        let v2_features = trait_to_features("HasX64V2").unwrap();
3234        let v4_features = trait_to_features("HasX64V4").unwrap();
3235
3236        for &f in v2_features {
3237            assert!(
3238                v4_features.contains(&f),
3239                "HasX64V4 should include v2 feature `{}` but doesn't",
3240                f
3241            );
3242        }
3243
3244        // v4 should have more features than v2
3245        assert!(
3246            v4_features.len() > v2_features.len(),
3247            "HasX64V4 should have more features than HasX64V2"
3248        );
3249    }
3250
3251    #[test]
3252    fn x64v3_trait_features_include_v2() {
3253        // X64V3Token as trait bound should include v2 features
3254        let v2 = trait_to_features("HasX64V2").unwrap();
3255        let v3 = trait_to_features("X64V3Token").unwrap();
3256
3257        for &f in v2 {
3258            assert!(
3259                v3.contains(&f),
3260                "X64V3Token trait features should include v2 feature `{}` but don't",
3261                f
3262            );
3263        }
3264    }
3265
3266    #[test]
3267    fn has_neon_aes_includes_neon() {
3268        let neon = trait_to_features("HasNeon").unwrap();
3269        let neon_aes = trait_to_features("HasNeonAes").unwrap();
3270
3271        for &f in neon {
3272            assert!(
3273                neon_aes.contains(&f),
3274                "HasNeonAes should include NEON feature `{}`",
3275                f
3276            );
3277        }
3278    }
3279
3280    #[test]
3281    fn no_removed_traits_are_recognized() {
3282        // These traits were removed in 0.3.0 and should NOT be recognized
3283        let removed = [
3284            "HasSse",
3285            "HasSse2",
3286            "HasSse41",
3287            "HasSse42",
3288            "HasAvx",
3289            "HasAvx2",
3290            "HasFma",
3291            "HasAvx512f",
3292            "HasAvx512bw",
3293            "HasAvx512vl",
3294            "HasAvx512vbmi2",
3295            "HasSve",
3296            "HasSve2",
3297        ];
3298
3299        for &name in &removed {
3300            assert!(
3301                trait_to_features(name).is_none(),
3302                "Removed trait `{}` should NOT be in trait_to_features(). \
3303                 It was removed in 0.3.0 — users should migrate to tier traits.",
3304                name
3305            );
3306        }
3307    }
3308
3309    #[test]
3310    fn no_nonexistent_tokens_are_recognized() {
3311        // These tokens don't exist and should NOT be recognized
3312        let fake = [
3313            "SveToken",
3314            "Sve2Token",
3315            "Avx512VnniToken",
3316            "X64V4ModernToken",
3317            "NeonFp16Token",
3318        ];
3319
3320        for &name in &fake {
3321            assert!(
3322                token_to_features(name).is_none(),
3323                "Non-existent token `{}` should NOT be in token_to_features()",
3324                name
3325            );
3326        }
3327    }
3328
3329    #[test]
3330    fn featureless_traits_are_not_in_registries() {
3331        // SimdToken and IntoConcreteToken should NOT be in any feature registry
3332        // because they don't map to CPU features
3333        for &name in FEATURELESS_TRAIT_NAMES {
3334            assert!(
3335                token_to_features(name).is_none(),
3336                "`{}` should NOT be in token_to_features() — it has no CPU features",
3337                name
3338            );
3339            assert!(
3340                trait_to_features(name).is_none(),
3341                "`{}` should NOT be in trait_to_features() — it has no CPU features",
3342                name
3343            );
3344        }
3345    }
3346
3347    #[test]
3348    fn find_featureless_trait_detects_simdtoken() {
3349        let names = vec!["SimdToken".to_string()];
3350        assert_eq!(find_featureless_trait(&names), Some("SimdToken"));
3351
3352        let names = vec!["IntoConcreteToken".to_string()];
3353        assert_eq!(find_featureless_trait(&names), Some("IntoConcreteToken"));
3354
3355        // Feature-bearing traits should NOT be detected
3356        let names = vec!["HasX64V2".to_string()];
3357        assert_eq!(find_featureless_trait(&names), None);
3358
3359        let names = vec!["HasNeon".to_string()];
3360        assert_eq!(find_featureless_trait(&names), None);
3361
3362        // Mixed: if SimdToken is among real traits, still detected
3363        let names = vec!["SimdToken".to_string(), "HasX64V2".to_string()];
3364        assert_eq!(find_featureless_trait(&names), Some("SimdToken"));
3365    }
3366
3367    #[test]
3368    fn arm64_v2_v3_traits_are_cumulative() {
3369        let v2_features = trait_to_features("HasArm64V2").unwrap();
3370        let v3_features = trait_to_features("HasArm64V3").unwrap();
3371
3372        for &f in v2_features {
3373            assert!(
3374                v3_features.contains(&f),
3375                "HasArm64V3 should include v2 feature `{}` but doesn't",
3376                f
3377            );
3378        }
3379
3380        assert!(
3381            v3_features.len() > v2_features.len(),
3382            "HasArm64V3 should have more features than HasArm64V2"
3383        );
3384    }
3385
3386    // =========================================================================
3387    // autoversion — argument parsing
3388    // =========================================================================
3389
3390    #[test]
3391    fn autoversion_args_empty() {
3392        let args: AutoversionArgs = syn::parse_str("").unwrap();
3393        assert!(args.self_type.is_none());
3394        assert!(args.tiers.is_none());
3395    }
3396
3397    #[test]
3398    fn autoversion_args_single_tier() {
3399        let args: AutoversionArgs = syn::parse_str("v3").unwrap();
3400        assert!(args.self_type.is_none());
3401        assert_eq!(args.tiers.as_ref().unwrap(), &["v3"]);
3402    }
3403
3404    #[test]
3405    fn autoversion_args_tiers_only() {
3406        let args: AutoversionArgs = syn::parse_str("v3, v4, neon").unwrap();
3407        assert!(args.self_type.is_none());
3408        let tiers = args.tiers.unwrap();
3409        assert_eq!(tiers, vec!["v3", "v4", "neon"]);
3410    }
3411
3412    #[test]
3413    fn autoversion_args_many_tiers() {
3414        let args: AutoversionArgs =
3415            syn::parse_str("v1, v2, v3, v4, v4x, neon, arm_v2, wasm128").unwrap();
3416        assert_eq!(
3417            args.tiers.unwrap(),
3418            vec!["v1", "v2", "v3", "v4", "v4x", "neon", "arm_v2", "wasm128"]
3419        );
3420    }
3421
3422    #[test]
3423    fn autoversion_args_trailing_comma() {
3424        let args: AutoversionArgs = syn::parse_str("v3, v4,").unwrap();
3425        assert_eq!(args.tiers.as_ref().unwrap(), &["v3", "v4"]);
3426    }
3427
3428    #[test]
3429    fn autoversion_args_self_only() {
3430        let args: AutoversionArgs = syn::parse_str("_self = MyType").unwrap();
3431        assert!(args.self_type.is_some());
3432        assert!(args.tiers.is_none());
3433    }
3434
3435    #[test]
3436    fn autoversion_args_self_and_tiers() {
3437        let args: AutoversionArgs = syn::parse_str("_self = MyType, v3, neon").unwrap();
3438        assert!(args.self_type.is_some());
3439        let tiers = args.tiers.unwrap();
3440        assert_eq!(tiers, vec!["v3", "neon"]);
3441    }
3442
3443    #[test]
3444    fn autoversion_args_tiers_then_self() {
3445        // _self can appear after tier names
3446        let args: AutoversionArgs = syn::parse_str("v3, neon, _self = MyType").unwrap();
3447        assert!(args.self_type.is_some());
3448        let tiers = args.tiers.unwrap();
3449        assert_eq!(tiers, vec!["v3", "neon"]);
3450    }
3451
3452    #[test]
3453    fn autoversion_args_self_with_path_type() {
3454        let args: AutoversionArgs = syn::parse_str("_self = crate::MyType").unwrap();
3455        assert!(args.self_type.is_some());
3456        assert!(args.tiers.is_none());
3457    }
3458
3459    #[test]
3460    fn autoversion_args_self_with_generic_type() {
3461        let args: AutoversionArgs = syn::parse_str("_self = Vec<u8>").unwrap();
3462        assert!(args.self_type.is_some());
3463        let ty_str = args.self_type.unwrap().to_token_stream().to_string();
3464        assert!(ty_str.contains("Vec"), "Expected Vec<u8>, got: {}", ty_str);
3465    }
3466
3467    #[test]
3468    fn autoversion_args_self_trailing_comma() {
3469        let args: AutoversionArgs = syn::parse_str("_self = MyType,").unwrap();
3470        assert!(args.self_type.is_some());
3471        assert!(args.tiers.is_none());
3472    }
3473
3474    // =========================================================================
3475    // autoversion — find_simd_token_param
3476    // =========================================================================
3477
3478    #[test]
3479    fn find_simd_token_param_first_position() {
3480        let f: ItemFn =
3481            syn::parse_str("fn process(token: SimdToken, data: &[f32]) -> f32 {}").unwrap();
3482        let param = find_simd_token_param(&f.sig).unwrap();
3483        assert_eq!(param.index, 0);
3484        assert_eq!(param.ident, "token");
3485    }
3486
3487    #[test]
3488    fn find_simd_token_param_second_position() {
3489        let f: ItemFn =
3490            syn::parse_str("fn process(data: &[f32], token: SimdToken) -> f32 {}").unwrap();
3491        let param = find_simd_token_param(&f.sig).unwrap();
3492        assert_eq!(param.index, 1);
3493        assert_eq!(param.ident, "token");
3494    }
3495
3496    #[test]
3497    fn find_simd_token_param_underscore_prefix() {
3498        let f: ItemFn =
3499            syn::parse_str("fn process(_token: SimdToken, data: &[f32]) -> f32 {}").unwrap();
3500        let param = find_simd_token_param(&f.sig).unwrap();
3501        assert_eq!(param.index, 0);
3502        assert_eq!(param.ident, "_token");
3503    }
3504
3505    #[test]
3506    fn find_simd_token_param_wildcard() {
3507        let f: ItemFn = syn::parse_str("fn process(_: SimdToken, data: &[f32]) -> f32 {}").unwrap();
3508        let param = find_simd_token_param(&f.sig).unwrap();
3509        assert_eq!(param.index, 0);
3510        assert_eq!(param.ident, "__autoversion_token");
3511    }
3512
3513    #[test]
3514    fn find_simd_token_param_not_found() {
3515        let f: ItemFn = syn::parse_str("fn process(data: &[f32]) -> f32 {}").unwrap();
3516        assert!(find_simd_token_param(&f.sig).is_none());
3517    }
3518
3519    #[test]
3520    fn find_simd_token_param_no_params() {
3521        let f: ItemFn = syn::parse_str("fn process() {}").unwrap();
3522        assert!(find_simd_token_param(&f.sig).is_none());
3523    }
3524
3525    #[test]
3526    fn find_simd_token_param_concrete_token_not_matched() {
3527        // autoversion looks specifically for SimdToken, not concrete tokens
3528        let f: ItemFn =
3529            syn::parse_str("fn process(token: X64V3Token, data: &[f32]) -> f32 {}").unwrap();
3530        assert!(find_simd_token_param(&f.sig).is_none());
3531    }
3532
3533    #[test]
3534    fn find_simd_token_param_scalar_token_not_matched() {
3535        let f: ItemFn =
3536            syn::parse_str("fn process(token: ScalarToken, data: &[f32]) -> f32 {}").unwrap();
3537        assert!(find_simd_token_param(&f.sig).is_none());
3538    }
3539
3540    #[test]
3541    fn find_simd_token_param_among_many() {
3542        let f: ItemFn = syn::parse_str(
3543            "fn process(a: i32, b: f64, token: SimdToken, c: &str, d: bool) -> f32 {}",
3544        )
3545        .unwrap();
3546        let param = find_simd_token_param(&f.sig).unwrap();
3547        assert_eq!(param.index, 2);
3548        assert_eq!(param.ident, "token");
3549    }
3550
3551    #[test]
3552    fn find_simd_token_param_with_generics() {
3553        let f: ItemFn =
3554            syn::parse_str("fn process<T: Clone>(token: SimdToken, data: &[T]) -> T {}").unwrap();
3555        let param = find_simd_token_param(&f.sig).unwrap();
3556        assert_eq!(param.index, 0);
3557        assert_eq!(param.ident, "token");
3558    }
3559
3560    #[test]
3561    fn find_simd_token_param_with_where_clause() {
3562        let f: ItemFn = syn::parse_str(
3563            "fn process<T>(token: SimdToken, data: &[T]) -> T where T: Copy + Default {}",
3564        )
3565        .unwrap();
3566        let param = find_simd_token_param(&f.sig).unwrap();
3567        assert_eq!(param.index, 0);
3568    }
3569
3570    #[test]
3571    fn find_simd_token_param_with_lifetime() {
3572        let f: ItemFn =
3573            syn::parse_str("fn process<'a>(token: SimdToken, data: &'a [f32]) -> &'a f32 {}")
3574                .unwrap();
3575        let param = find_simd_token_param(&f.sig).unwrap();
3576        assert_eq!(param.index, 0);
3577    }
3578
3579    // =========================================================================
3580    // autoversion — tier resolution
3581    // =========================================================================
3582
3583    #[test]
3584    fn autoversion_default_tiers_all_resolve() {
3585        let names: Vec<String> = DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect();
3586        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3587        assert!(!tiers.is_empty());
3588        // scalar should be present
3589        assert!(tiers.iter().any(|t| t.name == "scalar"));
3590    }
3591
3592    #[test]
3593    fn autoversion_scalar_always_appended() {
3594        let names = vec!["v3".to_string(), "neon".to_string()];
3595        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3596        assert!(
3597            tiers.iter().any(|t| t.name == "scalar"),
3598            "scalar must be auto-appended"
3599        );
3600    }
3601
3602    #[test]
3603    fn autoversion_scalar_not_duplicated() {
3604        let names = vec!["v3".to_string(), "scalar".to_string()];
3605        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3606        let scalar_count = tiers.iter().filter(|t| t.name == "scalar").count();
3607        assert_eq!(scalar_count, 1, "scalar must not be duplicated");
3608    }
3609
3610    #[test]
3611    fn autoversion_tiers_sorted_by_priority() {
3612        let names = vec!["neon".to_string(), "v4".to_string(), "v3".to_string()];
3613        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3614        // v4 (priority 40) > v3 (30) > neon (20) > scalar (0)
3615        let priorities: Vec<u32> = tiers.iter().map(|t| t.priority).collect();
3616        for window in priorities.windows(2) {
3617            assert!(
3618                window[0] >= window[1],
3619                "Tiers not sorted by priority: {:?}",
3620                priorities
3621            );
3622        }
3623    }
3624
3625    #[test]
3626    fn autoversion_unknown_tier_errors() {
3627        let names = vec!["v3".to_string(), "avx9000".to_string()];
3628        let result = resolve_tiers(&names, proc_macro2::Span::call_site(), false);
3629        match result {
3630            Ok(_) => panic!("Expected error for unknown tier 'avx9000'"),
3631            Err(e) => {
3632                let err_msg = e.to_string();
3633                assert!(
3634                    err_msg.contains("avx9000"),
3635                    "Error should mention unknown tier: {}",
3636                    err_msg
3637                );
3638            }
3639        }
3640    }
3641
3642    #[test]
3643    fn autoversion_all_known_tiers_resolve() {
3644        // Every tier in ALL_TIERS should be findable
3645        for tier in ALL_TIERS {
3646            assert!(
3647                find_tier(tier.name).is_some(),
3648                "Tier '{}' should be findable by name",
3649                tier.name
3650            );
3651        }
3652    }
3653
3654    #[test]
3655    fn autoversion_default_tier_list_is_sensible() {
3656        // Defaults should cover x86, ARM, WASM, and scalar
3657        let names: Vec<String> = DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect();
3658        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3659
3660        let has_x86 = tiers.iter().any(|t| t.target_arch == Some("x86_64"));
3661        let has_arm = tiers.iter().any(|t| t.target_arch == Some("aarch64"));
3662        let has_wasm = tiers.iter().any(|t| t.target_arch == Some("wasm32"));
3663        let has_scalar = tiers.iter().any(|t| t.name == "scalar");
3664
3665        assert!(has_x86, "Default tiers should include an x86_64 tier");
3666        assert!(has_arm, "Default tiers should include an aarch64 tier");
3667        assert!(has_wasm, "Default tiers should include a wasm32 tier");
3668        assert!(has_scalar, "Default tiers should include scalar");
3669    }
3670
3671    // =========================================================================
3672    // autoversion — variant replacement (AST manipulation)
3673    // =========================================================================
3674
3675    /// Mirrors what `autoversion_impl` does for a single variant: parse an
3676    /// ItemFn (for test convenience), rename it, swap the SimdToken param
3677    /// type, optionally inject the `_self` preamble for scalar+self.
3678    fn do_variant_replacement(func: &str, tier_name: &str, has_self: bool) -> ItemFn {
3679        let mut f: ItemFn = syn::parse_str(func).unwrap();
3680        let fn_name = f.sig.ident.to_string();
3681
3682        let tier = find_tier(tier_name).unwrap();
3683
3684        // Rename
3685        f.sig.ident = format_ident!("{}_{}", fn_name, tier.suffix);
3686
3687        // Find and replace SimdToken param type
3688        let token_idx = find_simd_token_param(&f.sig)
3689            .unwrap_or_else(|| panic!("No SimdToken param in: {}", func))
3690            .index;
3691        let concrete_type: Type = syn::parse_str(tier.token_path).unwrap();
3692        if let FnArg::Typed(pt) = &mut f.sig.inputs[token_idx] {
3693            *pt.ty = concrete_type;
3694        }
3695
3696        // Scalar + self: inject preamble
3697        if tier_name == "scalar" && has_self {
3698            let preamble: syn::Stmt = syn::parse_quote!(let _self = self;);
3699            f.block.stmts.insert(0, preamble);
3700        }
3701
3702        f
3703    }
3704
3705    #[test]
3706    fn variant_replacement_v3_renames_function() {
3707        let f = do_variant_replacement(
3708            "fn process(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3709            "v3",
3710            false,
3711        );
3712        assert_eq!(f.sig.ident, "process_v3");
3713    }
3714
3715    #[test]
3716    fn variant_replacement_v3_replaces_token_type() {
3717        let f = do_variant_replacement(
3718            "fn process(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3719            "v3",
3720            false,
3721        );
3722        let first_param_ty = match &f.sig.inputs[0] {
3723            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
3724            _ => panic!("Expected typed param"),
3725        };
3726        assert!(
3727            first_param_ty.contains("X64V3Token"),
3728            "Expected X64V3Token, got: {}",
3729            first_param_ty
3730        );
3731    }
3732
3733    #[test]
3734    fn variant_replacement_neon_produces_valid_fn() {
3735        let f = do_variant_replacement(
3736            "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3737            "neon",
3738            false,
3739        );
3740        assert_eq!(f.sig.ident, "compute_neon");
3741        let first_param_ty = match &f.sig.inputs[0] {
3742            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
3743            _ => panic!("Expected typed param"),
3744        };
3745        assert!(
3746            first_param_ty.contains("NeonToken"),
3747            "Expected NeonToken, got: {}",
3748            first_param_ty
3749        );
3750    }
3751
3752    #[test]
3753    fn variant_replacement_wasm128_produces_valid_fn() {
3754        let f = do_variant_replacement(
3755            "fn compute(_t: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3756            "wasm128",
3757            false,
3758        );
3759        assert_eq!(f.sig.ident, "compute_wasm128");
3760    }
3761
3762    #[test]
3763    fn variant_replacement_scalar_produces_valid_fn() {
3764        let f = do_variant_replacement(
3765            "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3766            "scalar",
3767            false,
3768        );
3769        assert_eq!(f.sig.ident, "compute_scalar");
3770        let first_param_ty = match &f.sig.inputs[0] {
3771            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
3772            _ => panic!("Expected typed param"),
3773        };
3774        assert!(
3775            first_param_ty.contains("ScalarToken"),
3776            "Expected ScalarToken, got: {}",
3777            first_param_ty
3778        );
3779    }
3780
3781    #[test]
3782    fn variant_replacement_v4_produces_valid_fn() {
3783        let f = do_variant_replacement(
3784            "fn transform(token: SimdToken, data: &mut [f32]) { }",
3785            "v4",
3786            false,
3787        );
3788        assert_eq!(f.sig.ident, "transform_v4");
3789        let first_param_ty = match &f.sig.inputs[0] {
3790            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
3791            _ => panic!("Expected typed param"),
3792        };
3793        assert!(
3794            first_param_ty.contains("X64V4Token"),
3795            "Expected X64V4Token, got: {}",
3796            first_param_ty
3797        );
3798    }
3799
3800    #[test]
3801    fn variant_replacement_v4x_produces_valid_fn() {
3802        let f = do_variant_replacement(
3803            "fn transform(token: SimdToken, data: &mut [f32]) { }",
3804            "v4x",
3805            false,
3806        );
3807        assert_eq!(f.sig.ident, "transform_v4x");
3808    }
3809
3810    #[test]
3811    fn variant_replacement_arm_v2_produces_valid_fn() {
3812        let f = do_variant_replacement(
3813            "fn transform(token: SimdToken, data: &mut [f32]) { }",
3814            "arm_v2",
3815            false,
3816        );
3817        assert_eq!(f.sig.ident, "transform_arm_v2");
3818    }
3819
3820    #[test]
3821    fn variant_replacement_preserves_generics() {
3822        let f = do_variant_replacement(
3823            "fn process<T: Copy + Default>(token: SimdToken, data: &[T]) -> T { T::default() }",
3824            "v3",
3825            false,
3826        );
3827        assert_eq!(f.sig.ident, "process_v3");
3828        // Generic params should still be present
3829        assert!(
3830            !f.sig.generics.params.is_empty(),
3831            "Generics should be preserved"
3832        );
3833    }
3834
3835    #[test]
3836    fn variant_replacement_preserves_where_clause() {
3837        let f = do_variant_replacement(
3838            "fn process<T>(token: SimdToken, data: &[T]) -> T where T: Copy + Default { T::default() }",
3839            "v3",
3840            false,
3841        );
3842        assert!(
3843            f.sig.generics.where_clause.is_some(),
3844            "Where clause should be preserved"
3845        );
3846    }
3847
3848    #[test]
3849    fn variant_replacement_preserves_return_type() {
3850        let f = do_variant_replacement(
3851            "fn process(token: SimdToken, data: &[f32]) -> Vec<f32> { vec![] }",
3852            "neon",
3853            false,
3854        );
3855        let ret = f.sig.output.to_token_stream().to_string();
3856        assert!(
3857            ret.contains("Vec"),
3858            "Return type should be preserved, got: {}",
3859            ret
3860        );
3861    }
3862
3863    #[test]
3864    fn variant_replacement_preserves_multiple_params() {
3865        let f = do_variant_replacement(
3866            "fn process(token: SimdToken, a: &[f32], b: &[f32], scale: f32) -> f32 { 0.0 }",
3867            "v3",
3868            false,
3869        );
3870        // SimdToken → X64V3Token, plus the 3 other params
3871        assert_eq!(f.sig.inputs.len(), 4);
3872    }
3873
3874    #[test]
3875    fn variant_replacement_preserves_no_return_type() {
3876        let f = do_variant_replacement(
3877            "fn transform(token: SimdToken, data: &mut [f32]) { }",
3878            "v3",
3879            false,
3880        );
3881        assert!(
3882            matches!(f.sig.output, ReturnType::Default),
3883            "No return type should remain as Default"
3884        );
3885    }
3886
3887    #[test]
3888    fn variant_replacement_preserves_lifetime_params() {
3889        let f = do_variant_replacement(
3890            "fn process<'a>(token: SimdToken, data: &'a [f32]) -> &'a [f32] { data }",
3891            "v3",
3892            false,
3893        );
3894        assert!(!f.sig.generics.params.is_empty());
3895    }
3896
3897    #[test]
3898    fn variant_replacement_scalar_self_injects_preamble() {
3899        let f = do_variant_replacement(
3900            "fn method(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3901            "scalar",
3902            true, // has_self
3903        );
3904        assert_eq!(f.sig.ident, "method_scalar");
3905
3906        // First statement should be `let _self = self;`
3907        let body_str = f.block.to_token_stream().to_string();
3908        assert!(
3909            body_str.contains("let _self = self"),
3910            "Scalar+self variant should have _self preamble, got: {}",
3911            body_str
3912        );
3913    }
3914
3915    #[test]
3916    fn variant_replacement_all_default_tiers_produce_valid_fns() {
3917        let names: Vec<String> = DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect();
3918        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3919
3920        for tier in &tiers {
3921            let f = do_variant_replacement(
3922                "fn process(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3923                tier.name,
3924                false,
3925            );
3926            let expected_name = format!("process_{}", tier.suffix);
3927            assert_eq!(
3928                f.sig.ident.to_string(),
3929                expected_name,
3930                "Tier '{}' should produce function '{}'",
3931                tier.name,
3932                expected_name
3933            );
3934        }
3935    }
3936
3937    #[test]
3938    fn variant_replacement_all_known_tiers_produce_valid_fns() {
3939        for tier in ALL_TIERS {
3940            let f = do_variant_replacement(
3941                "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3942                tier.name,
3943                false,
3944            );
3945            let expected_name = format!("compute_{}", tier.suffix);
3946            assert_eq!(
3947                f.sig.ident.to_string(),
3948                expected_name,
3949                "Tier '{}' should produce function '{}'",
3950                tier.name,
3951                expected_name
3952            );
3953        }
3954    }
3955
3956    #[test]
3957    fn variant_replacement_no_simdtoken_remains() {
3958        for tier in ALL_TIERS {
3959            let f = do_variant_replacement(
3960                "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3961                tier.name,
3962                false,
3963            );
3964            let full_str = f.to_token_stream().to_string();
3965            assert!(
3966                !full_str.contains("SimdToken"),
3967                "Tier '{}' variant still contains 'SimdToken': {}",
3968                tier.name,
3969                full_str
3970            );
3971        }
3972    }
3973
3974    // =========================================================================
3975    // autoversion — cfg guard and tier descriptor properties
3976    // =========================================================================
3977
3978    #[test]
3979    fn tier_v3_targets_x86_64() {
3980        let tier = find_tier("v3").unwrap();
3981        assert_eq!(tier.target_arch, Some("x86_64"));
3982    }
3983
3984    #[test]
3985    fn tier_v4_targets_x86_64() {
3986        let tier = find_tier("v4").unwrap();
3987        assert_eq!(tier.target_arch, Some("x86_64"));
3988    }
3989
3990    #[test]
3991    fn tier_v4x_targets_x86_64() {
3992        let tier = find_tier("v4x").unwrap();
3993        assert_eq!(tier.target_arch, Some("x86_64"));
3994    }
3995
3996    #[test]
3997    fn tier_neon_targets_aarch64() {
3998        let tier = find_tier("neon").unwrap();
3999        assert_eq!(tier.target_arch, Some("aarch64"));
4000    }
4001
4002    #[test]
4003    fn tier_wasm128_targets_wasm32() {
4004        let tier = find_tier("wasm128").unwrap();
4005        assert_eq!(tier.target_arch, Some("wasm32"));
4006    }
4007
4008    #[test]
4009    fn tier_scalar_has_no_guards() {
4010        let tier = find_tier("scalar").unwrap();
4011        assert_eq!(tier.target_arch, None);
4012        assert_eq!(tier.priority, 0);
4013    }
4014
4015    #[test]
4016    fn tier_priorities_are_consistent() {
4017        // Higher-capability tiers within the same arch should have higher priority
4018        let v2 = find_tier("v2").unwrap();
4019        let v3 = find_tier("v3").unwrap();
4020        let v4 = find_tier("v4").unwrap();
4021        assert!(v4.priority > v3.priority);
4022        assert!(v3.priority > v2.priority);
4023
4024        let neon = find_tier("neon").unwrap();
4025        let arm_v2 = find_tier("arm_v2").unwrap();
4026        let arm_v3 = find_tier("arm_v3").unwrap();
4027        assert!(arm_v3.priority > arm_v2.priority);
4028        assert!(arm_v2.priority > neon.priority);
4029
4030        // scalar is lowest
4031        let scalar = find_tier("scalar").unwrap();
4032        assert!(neon.priority > scalar.priority);
4033        assert!(v2.priority > scalar.priority);
4034    }
4035
4036    // =========================================================================
4037    // autoversion — dispatcher structure
4038    // =========================================================================
4039
4040    #[test]
4041    fn dispatcher_param_removal_free_fn() {
4042        // Simulate what autoversion_impl does: remove the SimdToken param
4043        let f: ItemFn =
4044            syn::parse_str("fn process(token: SimdToken, data: &[f32], scale: f32) -> f32 { 0.0 }")
4045                .unwrap();
4046
4047        let token_param = find_simd_token_param(&f.sig).unwrap();
4048        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4049        dispatcher_inputs.remove(token_param.index);
4050
4051        // Should have 2 params remaining: data, scale
4052        assert_eq!(dispatcher_inputs.len(), 2);
4053
4054        // Neither should be SimdToken
4055        for arg in &dispatcher_inputs {
4056            if let FnArg::Typed(pt) = arg {
4057                let ty_str = pt.ty.to_token_stream().to_string();
4058                assert!(
4059                    !ty_str.contains("SimdToken"),
4060                    "SimdToken should be removed from dispatcher, found: {}",
4061                    ty_str
4062                );
4063            }
4064        }
4065    }
4066
4067    #[test]
4068    fn dispatcher_param_removal_token_only() {
4069        let f: ItemFn = syn::parse_str("fn process(token: SimdToken) -> f32 { 0.0 }").unwrap();
4070
4071        let token_param = find_simd_token_param(&f.sig).unwrap();
4072        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4073        dispatcher_inputs.remove(token_param.index);
4074
4075        // No params left — dispatcher takes no arguments
4076        assert_eq!(dispatcher_inputs.len(), 0);
4077    }
4078
4079    #[test]
4080    fn dispatcher_param_removal_token_last() {
4081        let f: ItemFn =
4082            syn::parse_str("fn process(data: &[f32], scale: f32, token: SimdToken) -> f32 { 0.0 }")
4083                .unwrap();
4084
4085        let token_param = find_simd_token_param(&f.sig).unwrap();
4086        assert_eq!(token_param.index, 2);
4087
4088        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4089        dispatcher_inputs.remove(token_param.index);
4090
4091        assert_eq!(dispatcher_inputs.len(), 2);
4092    }
4093
4094    #[test]
4095    fn dispatcher_dispatch_args_extraction() {
4096        // Test that we correctly extract idents for the dispatch call
4097        let f: ItemFn =
4098            syn::parse_str("fn process(data: &[f32], scale: f32) -> f32 { 0.0 }").unwrap();
4099
4100        let dispatch_args: Vec<String> = f
4101            .sig
4102            .inputs
4103            .iter()
4104            .filter_map(|arg| {
4105                if let FnArg::Typed(PatType { pat, .. }) = arg {
4106                    if let syn::Pat::Ident(pi) = pat.as_ref() {
4107                        return Some(pi.ident.to_string());
4108                    }
4109                }
4110                None
4111            })
4112            .collect();
4113
4114        assert_eq!(dispatch_args, vec!["data", "scale"]);
4115    }
4116
4117    #[test]
4118    fn dispatcher_wildcard_params_get_renamed() {
4119        let f: ItemFn = syn::parse_str("fn process(_: &[f32], _: f32) -> f32 { 0.0 }").unwrap();
4120
4121        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4122
4123        let mut wild_counter = 0u32;
4124        for arg in &mut dispatcher_inputs {
4125            if let FnArg::Typed(pat_type) = arg {
4126                if matches!(pat_type.pat.as_ref(), syn::Pat::Wild(_)) {
4127                    let ident = format_ident!("__autoversion_wild_{}", wild_counter);
4128                    wild_counter += 1;
4129                    *pat_type.pat = syn::Pat::Ident(syn::PatIdent {
4130                        attrs: vec![],
4131                        by_ref: None,
4132                        mutability: None,
4133                        ident,
4134                        subpat: None,
4135                    });
4136                }
4137            }
4138        }
4139
4140        // Both wildcards should be renamed
4141        assert_eq!(wild_counter, 2);
4142
4143        let names: Vec<String> = dispatcher_inputs
4144            .iter()
4145            .filter_map(|arg| {
4146                if let FnArg::Typed(PatType { pat, .. }) = arg {
4147                    if let syn::Pat::Ident(pi) = pat.as_ref() {
4148                        return Some(pi.ident.to_string());
4149                    }
4150                }
4151                None
4152            })
4153            .collect();
4154
4155        assert_eq!(names, vec!["__autoversion_wild_0", "__autoversion_wild_1"]);
4156    }
4157
4158    // =========================================================================
4159    // autoversion — suffix_path (reused in dispatch)
4160    // =========================================================================
4161
4162    #[test]
4163    fn suffix_path_simple() {
4164        let path: syn::Path = syn::parse_str("process").unwrap();
4165        let suffixed = suffix_path(&path, "v3");
4166        assert_eq!(suffixed.to_token_stream().to_string(), "process_v3");
4167    }
4168
4169    #[test]
4170    fn suffix_path_qualified() {
4171        let path: syn::Path = syn::parse_str("module::process").unwrap();
4172        let suffixed = suffix_path(&path, "neon");
4173        let s = suffixed.to_token_stream().to_string();
4174        assert!(
4175            s.contains("process_neon"),
4176            "Expected process_neon, got: {}",
4177            s
4178        );
4179    }
4180}