Skip to main content

archmage_macros/
lib.rs

1//! Proc-macros for archmage SIMD capability tokens.
2//!
3//! Provides `#[arcane]` attribute (with `#[arcane]` alias) to make raw intrinsics
4//! safe via token proof.
5
6use proc_macro::TokenStream;
7use quote::{ToTokens, format_ident, quote};
8use syn::{
9    Attribute, FnArg, GenericParam, Ident, PatType, Signature, Token, Type, TypeParamBound,
10    parse::{Parse, ParseStream},
11    parse_macro_input, parse_quote, token,
12};
13
14/// A function parsed with the body left as an opaque TokenStream.
15///
16/// Only the signature is fully parsed into an AST — the body tokens are collected
17/// without building any AST nodes (no expressions, statements, or patterns parsed).
18/// This saves ~2ms per function invocation at 100 lines of code.
19#[derive(Clone)]
20struct LightFn {
21    attrs: Vec<Attribute>,
22    vis: syn::Visibility,
23    sig: Signature,
24    brace_token: token::Brace,
25    body: proc_macro2::TokenStream,
26}
27
28impl Parse for LightFn {
29    fn parse(input: ParseStream) -> syn::Result<Self> {
30        let attrs = input.call(Attribute::parse_outer)?;
31        let vis: syn::Visibility = input.parse()?;
32        let sig: Signature = input.parse()?;
33        let content;
34        let brace_token = syn::braced!(content in input);
35        let body: proc_macro2::TokenStream = content.parse()?;
36        Ok(LightFn {
37            attrs,
38            vis,
39            sig,
40            brace_token,
41            body,
42        })
43    }
44}
45
46impl ToTokens for LightFn {
47    fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
48        for attr in &self.attrs {
49            attr.to_tokens(tokens);
50        }
51        self.vis.to_tokens(tokens);
52        self.sig.to_tokens(tokens);
53        self.brace_token.surround(tokens, |tokens| {
54            self.body.to_tokens(tokens);
55        });
56    }
57}
58
59/// Build a turbofish token stream from a function's generics.
60///
61/// Collects type and const generic parameters (skipping lifetimes) and returns
62/// a `::<A, B, N, M>` turbofish fragment. Returns empty tokens if there are no
63/// type/const generics to forward.
64///
65/// This is needed when the dispatcher or wrapper calls variant/sibling functions
66/// that have const generics not inferable from argument types alone.
67fn build_turbofish(generics: &syn::Generics) -> proc_macro2::TokenStream {
68    let params: Vec<proc_macro2::TokenStream> = generics
69        .params
70        .iter()
71        .filter_map(|param| match param {
72            GenericParam::Type(tp) => {
73                let ident = &tp.ident;
74                Some(quote! { #ident })
75            }
76            GenericParam::Const(cp) => {
77                let ident = &cp.ident;
78                Some(quote! { #ident })
79            }
80            GenericParam::Lifetime(_) => None,
81        })
82        .collect();
83    if params.is_empty() {
84        quote! {}
85    } else {
86        quote! { ::<#(#params),*> }
87    }
88}
89
90/// Replace all `Self` identifier tokens with a concrete type in a token stream.
91///
92/// Recurses into groups (braces, parens, brackets). Used for `#[arcane(_self = Type)]`
93/// to replace `Self` in both the return type and body without needing to parse the body.
94fn replace_self_in_tokens(
95    tokens: proc_macro2::TokenStream,
96    replacement: &Type,
97) -> proc_macro2::TokenStream {
98    let mut result = proc_macro2::TokenStream::new();
99    for tt in tokens {
100        match tt {
101            proc_macro2::TokenTree::Ident(ref ident) if ident == "Self" => {
102                result.extend(replacement.to_token_stream());
103            }
104            proc_macro2::TokenTree::Group(group) => {
105                let new_stream = replace_self_in_tokens(group.stream(), replacement);
106                let mut new_group = proc_macro2::Group::new(group.delimiter(), new_stream);
107                new_group.set_span(group.span());
108                result.extend(std::iter::once(proc_macro2::TokenTree::Group(new_group)));
109            }
110            other => {
111                result.extend(std::iter::once(other));
112            }
113        }
114    }
115    result
116}
117
118/// Arguments to the `#[arcane]` macro.
119#[derive(Default)]
120struct ArcaneArgs {
121    /// Use `#[inline(always)]` instead of `#[inline]` for the inner function.
122    /// Requires nightly Rust with `#![feature(target_feature_inline_always)]`.
123    inline_always: bool,
124    /// The concrete type to use for `self` receiver.
125    /// When specified, `self`/`&self`/`&mut self` is transformed to `_self: Type`/`&Type`/`&mut Type`.
126    /// Implies `nested = true`.
127    self_type: Option<Type>,
128    /// Generate an `unreachable!()` stub on the wrong architecture.
129    /// Default is false (cfg-out: no function emitted on wrong arch).
130    stub: bool,
131    /// Use nested inner function instead of sibling function.
132    /// Implied by `_self = Type`. Required for associated functions in impl blocks
133    /// that have no `self` receiver (the macro can't distinguish them from free functions).
134    nested: bool,
135    /// Inject `use archmage::intrinsics::{arch}::*;` (includes safe memory ops).
136    import_intrinsics: bool,
137    /// Inject `use magetypes::simd::{ns}::*;`, `use magetypes::simd::generic::*;`,
138    /// and `use magetypes::simd::backends::*;`.
139    import_magetypes: bool,
140}
141
142impl Parse for ArcaneArgs {
143    fn parse(input: ParseStream) -> syn::Result<Self> {
144        let mut args = ArcaneArgs::default();
145
146        while !input.is_empty() {
147            let ident: Ident = input.parse()?;
148            match ident.to_string().as_str() {
149                "inline_always" => args.inline_always = true,
150                "stub" => args.stub = true,
151                "nested" => args.nested = true,
152                "import_intrinsics" => args.import_intrinsics = true,
153                "import_magetypes" => args.import_magetypes = true,
154                "_self" => {
155                    let _: Token![=] = input.parse()?;
156                    args.self_type = Some(input.parse()?);
157                }
158                other => {
159                    return Err(syn::Error::new(
160                        ident.span(),
161                        format!("unknown arcane argument: `{}`", other),
162                    ));
163                }
164            }
165            // Consume optional comma
166            if input.peek(Token![,]) {
167                let _: Token![,] = input.parse()?;
168            }
169        }
170
171        // _self = Type implies nested (inner fn needed for Self replacement)
172        if args.self_type.is_some() {
173            args.nested = true;
174        }
175
176        Ok(args)
177    }
178}
179
180// Token-to-features and trait-to-features mappings are generated from
181// token-registry.toml by xtask. Regenerate with: cargo run -p xtask -- generate
182mod generated;
183use generated::{
184    token_to_arch, token_to_features, token_to_magetypes_namespace, trait_to_arch,
185    trait_to_features, trait_to_magetypes_namespace,
186};
187
188/// Result of extracting token info from a type.
189enum TokenTypeInfo {
190    /// Concrete token type (e.g., `Avx2Token`)
191    Concrete(String),
192    /// impl Trait with the trait names (e.g., `impl HasX64V2`)
193    ImplTrait(Vec<String>),
194    /// Generic type parameter name (e.g., `T`)
195    Generic(String),
196}
197
198/// Extract token type information from a type.
199fn extract_token_type_info(ty: &Type) -> Option<TokenTypeInfo> {
200    match ty {
201        Type::Path(type_path) => {
202            // Get the last segment of the path (e.g., "Avx2Token" from "archmage::Avx2Token")
203            type_path.path.segments.last().map(|seg| {
204                let name = seg.ident.to_string();
205                // Check if it's a known concrete token type
206                if token_to_features(&name).is_some() {
207                    TokenTypeInfo::Concrete(name)
208                } else {
209                    // Might be a generic type parameter like `T`
210                    TokenTypeInfo::Generic(name)
211                }
212            })
213        }
214        Type::Reference(type_ref) => {
215            // Handle &Token or &mut Token
216            extract_token_type_info(&type_ref.elem)
217        }
218        Type::ImplTrait(impl_trait) => {
219            // Handle `impl HasX64V2` or `impl HasX64V2 + HasNeon`
220            let traits: Vec<String> = extract_trait_names_from_bounds(&impl_trait.bounds);
221            if traits.is_empty() {
222                None
223            } else {
224                Some(TokenTypeInfo::ImplTrait(traits))
225            }
226        }
227        _ => None,
228    }
229}
230
231/// Extract trait names from type param bounds.
232fn extract_trait_names_from_bounds(
233    bounds: &syn::punctuated::Punctuated<TypeParamBound, Token![+]>,
234) -> Vec<String> {
235    bounds
236        .iter()
237        .filter_map(|bound| {
238            if let TypeParamBound::Trait(trait_bound) = bound {
239                trait_bound
240                    .path
241                    .segments
242                    .last()
243                    .map(|seg| seg.ident.to_string())
244            } else {
245                None
246            }
247        })
248        .collect()
249}
250
251/// Look up a generic type parameter in the function's generics.
252fn find_generic_bounds(sig: &Signature, type_name: &str) -> Option<Vec<String>> {
253    // Check inline bounds first (e.g., `fn foo<T: HasX64V2>(token: T)`)
254    for param in &sig.generics.params {
255        if let GenericParam::Type(type_param) = param
256            && type_param.ident == type_name
257        {
258            let traits = extract_trait_names_from_bounds(&type_param.bounds);
259            if !traits.is_empty() {
260                return Some(traits);
261            }
262        }
263    }
264
265    // Check where clause (e.g., `fn foo<T>(token: T) where T: HasX64V2`)
266    if let Some(where_clause) = &sig.generics.where_clause {
267        for predicate in &where_clause.predicates {
268            if let syn::WherePredicate::Type(pred_type) = predicate
269                && let Type::Path(type_path) = &pred_type.bounded_ty
270                && let Some(seg) = type_path.path.segments.last()
271                && seg.ident == type_name
272            {
273                let traits = extract_trait_names_from_bounds(&pred_type.bounds);
274                if !traits.is_empty() {
275                    return Some(traits);
276                }
277            }
278        }
279    }
280
281    None
282}
283
284/// Convert trait names to features, collecting all features from all traits.
285fn traits_to_features(trait_names: &[String]) -> Option<Vec<&'static str>> {
286    let mut all_features = Vec::new();
287
288    for trait_name in trait_names {
289        if let Some(features) = trait_to_features(trait_name) {
290            for &feature in features {
291                if !all_features.contains(&feature) {
292                    all_features.push(feature);
293                }
294            }
295        }
296    }
297
298    if all_features.is_empty() {
299        None
300    } else {
301        Some(all_features)
302    }
303}
304
305/// Trait names that don't map to any CPU features. These are valid in the type
306/// system but cannot be used as token bounds in `#[arcane]`/`#[rite]` because
307/// the macros need concrete features to generate `#[target_feature]` attributes.
308const FEATURELESS_TRAIT_NAMES: &[&str] = &["SimdToken", "IntoConcreteToken"];
309
310/// Check if any trait names are featureless (no CPU feature mapping).
311/// Returns the first featureless trait name found.
312fn find_featureless_trait(trait_names: &[String]) -> Option<&'static str> {
313    for name in trait_names {
314        for &featureless in FEATURELESS_TRAIT_NAMES {
315            if name == featureless {
316                return Some(featureless);
317            }
318        }
319    }
320    None
321}
322
323/// Diagnose why `find_token_param` failed. Returns the name of a featureless
324/// trait if the signature has a parameter bounded by one (e.g., `SimdToken`).
325fn diagnose_featureless_token(sig: &Signature) -> Option<&'static str> {
326    for arg in &sig.inputs {
327        if let FnArg::Typed(PatType { ty, .. }) = arg
328            && let Some(info) = extract_token_type_info(ty)
329        {
330            match &info {
331                TokenTypeInfo::ImplTrait(names) => {
332                    if let Some(name) = find_featureless_trait(names) {
333                        return Some(name);
334                    }
335                }
336                TokenTypeInfo::Generic(type_name) => {
337                    // Check if the type name itself is a featureless trait
338                    // (e.g., `token: SimdToken` used as a bare path)
339                    let as_vec = vec![type_name.clone()];
340                    if let Some(name) = find_featureless_trait(&as_vec) {
341                        return Some(name);
342                    }
343                    // Check generic bounds (e.g., `T: SimdToken`)
344                    if let Some(bounds) = find_generic_bounds(sig, type_name)
345                        && let Some(name) = find_featureless_trait(&bounds)
346                    {
347                        return Some(name);
348                    }
349                }
350                TokenTypeInfo::Concrete(_) => {}
351            }
352        }
353    }
354    None
355}
356
357/// Result of finding a token parameter in a function signature.
358struct TokenParamInfo {
359    /// The parameter identifier (e.g., `token`)
360    ident: Ident,
361    /// Target features to enable (e.g., `["avx2", "fma"]`)
362    features: Vec<&'static str>,
363    /// Target architecture (Some for concrete tokens, None for traits/generics)
364    target_arch: Option<&'static str>,
365    /// Concrete token type name (Some for concrete tokens, None for traits/generics)
366    token_type_name: Option<String>,
367    /// Magetypes width namespace (e.g., "v3", "neon", "wasm128")
368    magetypes_namespace: Option<&'static str>,
369}
370
371/// Resolve magetypes namespace from a list of trait names.
372/// Returns the first matching namespace found.
373fn traits_to_magetypes_namespace(trait_names: &[String]) -> Option<&'static str> {
374    for name in trait_names {
375        if let Some(ns) = trait_to_magetypes_namespace(name) {
376            return Some(ns);
377        }
378    }
379    None
380}
381
382/// Given trait bound names, return the first matching target architecture.
383fn traits_to_arch(trait_names: &[String]) -> Option<&'static str> {
384    for name in trait_names {
385        if let Some(arch) = trait_to_arch(name) {
386            return Some(arch);
387        }
388    }
389    None
390}
391
392/// Find the first token parameter in a function signature.
393fn find_token_param(sig: &Signature) -> Option<TokenParamInfo> {
394    for arg in &sig.inputs {
395        match arg {
396            FnArg::Receiver(_) => {
397                // Self receivers (self, &self, &mut self) are not yet supported.
398                // The macro creates an inner function, and Rust's inner functions
399                // cannot have `self` parameters. Supporting this would require
400                // AST rewriting to replace `self` with a regular parameter.
401                // See the module docs for the workaround.
402                continue;
403            }
404            FnArg::Typed(PatType { pat, ty, .. }) => {
405                if let Some(info) = extract_token_type_info(ty) {
406                    let (features, arch, token_name, mage_ns) = match info {
407                        TokenTypeInfo::Concrete(ref name) => {
408                            let features = token_to_features(name).map(|f| f.to_vec());
409                            let arch = token_to_arch(name);
410                            let ns = token_to_magetypes_namespace(name);
411                            (features, arch, Some(name.clone()), ns)
412                        }
413                        TokenTypeInfo::ImplTrait(ref trait_names) => {
414                            let ns = traits_to_magetypes_namespace(trait_names);
415                            let arch = traits_to_arch(trait_names);
416                            (traits_to_features(trait_names), arch, None, ns)
417                        }
418                        TokenTypeInfo::Generic(type_name) => {
419                            // Look up the generic parameter's bounds
420                            let bounds = find_generic_bounds(sig, &type_name);
421                            let features = bounds.as_ref().and_then(|t| traits_to_features(t));
422                            let ns = bounds
423                                .as_ref()
424                                .and_then(|t| traits_to_magetypes_namespace(t));
425                            let arch = bounds.as_ref().and_then(|t| traits_to_arch(t));
426                            (features, arch, None, ns)
427                        }
428                    };
429
430                    if let Some(features) = features {
431                        // Extract parameter name (or synthesize one for wildcard `_`)
432                        let ident = match pat.as_ref() {
433                            syn::Pat::Ident(pat_ident) => Some(pat_ident.ident.clone()),
434                            syn::Pat::Wild(w) => {
435                                Some(Ident::new("__archmage_token", w.underscore_token.span))
436                            }
437                            _ => None,
438                        };
439                        if let Some(ident) = ident {
440                            return Some(TokenParamInfo {
441                                ident,
442                                features,
443                                target_arch: arch,
444                                token_type_name: token_name,
445                                magetypes_namespace: mage_ns,
446                            });
447                        }
448                    }
449                }
450            }
451        }
452    }
453    None
454}
455
456/// Represents the kind of self receiver and the transformed parameter.
457enum SelfReceiver {
458    /// `self` (by value/move)
459    Owned,
460    /// `&self` (shared reference)
461    Ref,
462    /// `&mut self` (mutable reference)
463    RefMut,
464}
465
466/// Generate import statements to prepend to a function body.
467///
468/// Returns a `TokenStream` of `use` statements based on the import flags,
469/// target architecture, and magetypes namespace.
470fn generate_imports(
471    target_arch: Option<&str>,
472    magetypes_namespace: Option<&str>,
473    import_intrinsics: bool,
474    import_magetypes: bool,
475) -> proc_macro2::TokenStream {
476    let mut imports = proc_macro2::TokenStream::new();
477
478    if import_intrinsics && let Some(arch) = target_arch {
479        let arch_ident = format_ident!("{}", arch);
480        imports.extend(quote! {
481            #[allow(unused_imports)]
482            use archmage::intrinsics::#arch_ident::*;
483        });
484        // ScalarToken or unknown arch: import_intrinsics is a no-op
485    }
486
487    if import_magetypes && let Some(ns) = magetypes_namespace {
488        let ns_ident = format_ident!("{}", ns);
489        imports.extend(quote! {
490            #[allow(unused_imports)]
491            use magetypes::simd::#ns_ident::*;
492            #[allow(unused_imports)]
493            use magetypes::simd::backends::*;
494        });
495    }
496
497    imports
498}
499
500/// Shared implementation for arcane/arcane macros.
501fn arcane_impl(mut input_fn: LightFn, macro_name: &str, args: ArcaneArgs) -> TokenStream {
502    // Check for self receiver
503    let has_self_receiver = input_fn
504        .sig
505        .inputs
506        .first()
507        .map(|arg| matches!(arg, FnArg::Receiver(_)))
508        .unwrap_or(false);
509
510    // Nested mode is required when _self = Type is used (for Self replacement in nested fn).
511    // In sibling mode, self/Self work naturally since both fns live in the same impl scope.
512    // However, if there's a self receiver in nested mode, we still need _self = Type.
513    if has_self_receiver && args.nested && args.self_type.is_none() {
514        let msg = format!(
515            "{} with self receiver in nested mode requires `_self = Type` argument.\n\
516             Example: #[{}(nested, _self = MyType)]\n\
517             Use `_self` (not `self`) in the function body to refer to self.\n\
518             \n\
519             Alternatively, remove `nested` to use sibling expansion (default), \
520             which handles self/Self naturally.",
521            macro_name, macro_name
522        );
523        return syn::Error::new_spanned(&input_fn.sig, msg)
524            .to_compile_error()
525            .into();
526    }
527
528    // Find the token parameter, its features, target arch, and token type name
529    let TokenParamInfo {
530        ident: _token_ident,
531        features,
532        target_arch,
533        token_type_name,
534        magetypes_namespace,
535    } = match find_token_param(&input_fn.sig) {
536        Some(result) => result,
537        None => {
538            // Check for specific misuse: featureless traits like SimdToken
539            if let Some(trait_name) = diagnose_featureless_token(&input_fn.sig) {
540                let msg = format!(
541                    "`{trait_name}` cannot be used as a token bound in #[{macro_name}] \
542                     because it doesn't specify any CPU features.\n\
543                     \n\
544                     #[{macro_name}] needs concrete features to generate #[target_feature]. \
545                     Use a concrete token or a feature trait:\n\
546                     \n\
547                     Concrete tokens: X64V3Token, Desktop64, NeonToken, Arm64V2Token, ...\n\
548                     Feature traits:  impl HasX64V2, impl HasNeon, impl HasArm64V3, ..."
549                );
550                return syn::Error::new_spanned(&input_fn.sig, msg)
551                    .to_compile_error()
552                    .into();
553            }
554            let msg = format!(
555                "{} requires a token parameter. Supported forms:\n\
556                 - Concrete: `token: X64V3Token`\n\
557                 - impl Trait: `token: impl HasX64V2`\n\
558                 - Generic: `fn foo<T: HasX64V2>(token: T, ...)`\n\
559                 - With self: `#[{}(_self = Type)] fn method(&self, token: impl HasNeon, ...)`",
560                macro_name, macro_name
561            );
562            return syn::Error::new_spanned(&input_fn.sig, msg)
563                .to_compile_error()
564                .into();
565        }
566    };
567
568    // Prepend import statements to body if requested
569    let body_imports = generate_imports(
570        target_arch,
571        magetypes_namespace,
572        args.import_intrinsics,
573        args.import_magetypes,
574    );
575    if !body_imports.is_empty() {
576        let original_body = &input_fn.body;
577        input_fn.body = quote! {
578            #body_imports
579            #original_body
580        };
581    }
582
583    // Build target_feature attributes
584    let target_feature_attrs: Vec<Attribute> = features
585        .iter()
586        .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
587        .collect();
588
589    // Rename wildcard patterns (`_: Type`) to named params so the inner/sibling call works
590    let mut wild_rename_counter = 0u32;
591    for arg in &mut input_fn.sig.inputs {
592        if let FnArg::Typed(pat_type) = arg
593            && matches!(pat_type.pat.as_ref(), syn::Pat::Wild(_))
594        {
595            let ident = format_ident!("__archmage_wild_{}", wild_rename_counter);
596            wild_rename_counter += 1;
597            *pat_type.pat = syn::Pat::Ident(syn::PatIdent {
598                attrs: vec![],
599                by_ref: None,
600                mutability: None,
601                ident,
602                subpat: None,
603            });
604        }
605    }
606
607    // Choose inline attribute based on args
608    let inline_attr: Attribute = if args.inline_always {
609        parse_quote!(#[inline(always)])
610    } else {
611        parse_quote!(#[inline])
612    };
613
614    // On wasm32, #[target_feature(enable = "simd128")] functions are safe (Rust 1.54+).
615    // The wasm validation model guarantees unsupported instructions trap deterministically,
616    // so there's no UB from feature mismatch. Skip the unsafe wrapper entirely.
617    if target_arch == Some("wasm32") {
618        return arcane_impl_wasm_safe(
619            input_fn,
620            &args,
621            token_type_name,
622            target_feature_attrs,
623            inline_attr,
624        );
625    }
626
627    if args.nested {
628        arcane_impl_nested(
629            input_fn,
630            &args,
631            target_arch,
632            token_type_name,
633            target_feature_attrs,
634            inline_attr,
635        )
636    } else {
637        arcane_impl_sibling(
638            input_fn,
639            &args,
640            target_arch,
641            token_type_name,
642            target_feature_attrs,
643            inline_attr,
644        )
645    }
646}
647
648/// WASM-safe expansion: emits rite-style output (no unsafe wrapper).
649///
650/// On wasm32, `#[target_feature(enable = "simd128")]` is safe — the wasm validation
651/// model traps deterministically on unsupported instructions, so there's no UB.
652/// We emit the function directly with `#[target_feature]` + `#[inline]`, like `#[rite]`.
653///
654/// If `_self = Type` is set, we inject `let _self = self;` at the top of the body
655/// (the function stays in impl scope, so `Self` resolves naturally — no replacement needed).
656fn arcane_impl_wasm_safe(
657    input_fn: LightFn,
658    args: &ArcaneArgs,
659    token_type_name: Option<String>,
660    target_feature_attrs: Vec<Attribute>,
661    inline_attr: Attribute,
662) -> TokenStream {
663    let vis = &input_fn.vis;
664    let sig = &input_fn.sig;
665    let fn_name = &sig.ident;
666    let attrs = &input_fn.attrs;
667
668    let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
669
670    // If _self = Type is set, inject `let _self = self;` at top of body so user code
671    // referencing `_self` works. The function remains in impl scope, so `Self` resolves
672    // naturally — no Self replacement needed (unlike nested mode's inner fn).
673    let body = if args.self_type.is_some() {
674        let original_body = &input_fn.body;
675        quote! {
676            let _self = self;
677            #original_body
678        }
679    } else {
680        input_fn.body.clone()
681    };
682
683    // Prepend target_feature + inline attrs
684    let mut new_attrs = target_feature_attrs;
685    new_attrs.push(inline_attr);
686    for attr in attrs {
687        new_attrs.push(attr.clone());
688    }
689
690    let stub = if args.stub {
691        // Build stub args for suppressing unused-variable warnings
692        let stub_args: Vec<proc_macro2::TokenStream> = sig
693            .inputs
694            .iter()
695            .filter_map(|arg| match arg {
696                FnArg::Typed(pat_type) => {
697                    if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
698                        let ident = &pat_ident.ident;
699                        Some(quote!(#ident))
700                    } else {
701                        None
702                    }
703                }
704                FnArg::Receiver(_) => None,
705            })
706            .collect();
707
708        quote! {
709            #[cfg(not(target_arch = "wasm32"))]
710            #vis #sig {
711                let _ = (#(#stub_args),*);
712                unreachable!(
713                    "BUG: {}() was called but requires {} (target_arch = \"wasm32\"). \
714                     {}::summon() returns None on this architecture, so this function \
715                     is unreachable in safe code. If you used forge_token_dangerously(), \
716                     that is the bug.",
717                    stringify!(#fn_name),
718                    #token_type_str,
719                    #token_type_str,
720                )
721            }
722        }
723    } else {
724        quote! {}
725    };
726
727    let expanded = quote! {
728        #[cfg(target_arch = "wasm32")]
729        #(#new_attrs)*
730        #vis #sig {
731            #body
732        }
733
734        #stub
735    };
736
737    expanded.into()
738}
739
740/// Sibling expansion (default): generates two functions at the same scope level.
741///
742/// ```ignore
743/// // #[arcane] fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { body }
744/// // expands to:
745/// #[cfg(target_arch = "x86_64")]
746/// #[doc(hidden)]
747/// #[target_feature(enable = "avx2,fma,...")]
748/// #[inline]
749/// fn __arcane_process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { body }
750///
751/// #[cfg(target_arch = "x86_64")]
752/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] {
753///     unsafe { __arcane_process(token, data) }
754/// }
755/// ```
756///
757/// The sibling function is safe (Rust 2024 edition allows safe `#[target_feature]`
758/// functions). Only the call from the wrapper needs `unsafe` because the wrapper
759/// lacks matching target features. Compatible with `#![forbid(unsafe_code)]`.
760///
761/// Self/self work naturally since both functions live in the same impl scope.
762fn arcane_impl_sibling(
763    input_fn: LightFn,
764    args: &ArcaneArgs,
765    target_arch: Option<&str>,
766    token_type_name: Option<String>,
767    target_feature_attrs: Vec<Attribute>,
768    inline_attr: Attribute,
769) -> TokenStream {
770    let vis = &input_fn.vis;
771    let sig = &input_fn.sig;
772    let fn_name = &sig.ident;
773    let generics = &sig.generics;
774    let where_clause = &generics.where_clause;
775    let inputs = &sig.inputs;
776    let output = &sig.output;
777    let body = &input_fn.body;
778    let attrs = &input_fn.attrs;
779
780    let sibling_name = format_ident!("__arcane_{}", fn_name);
781
782    // Detect self receiver
783    let has_self_receiver = inputs
784        .first()
785        .map(|arg| matches!(arg, FnArg::Receiver(_)))
786        .unwrap_or(false);
787
788    // Build sibling signature: same as original but with sibling name, #[doc(hidden)]
789    // NOT unsafe — Rust 2024 edition allows safe #[target_feature] functions.
790    // Only the call from non-matching context (the wrapper) needs unsafe.
791    let sibling_sig_inputs = inputs;
792
793    // Build turbofish for forwarding type/const generic params to sibling
794    let turbofish = build_turbofish(generics);
795
796    // Build the call from wrapper to sibling
797    let sibling_call = if has_self_receiver {
798        // Method: self.__arcane_fn::<T, N>(other_args...)
799        let other_args: Vec<proc_macro2::TokenStream> = inputs
800            .iter()
801            .skip(1) // skip self receiver
802            .filter_map(|arg| {
803                if let FnArg::Typed(pat_type) = arg
804                    && let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref()
805                {
806                    let ident = &pat_ident.ident;
807                    Some(quote!(#ident))
808                } else {
809                    None
810                }
811            })
812            .collect();
813        quote! { self.#sibling_name #turbofish(#(#other_args),*) }
814    } else {
815        // Free function: __arcane_fn::<T, N>(all_args...)
816        let all_args: Vec<proc_macro2::TokenStream> = inputs
817            .iter()
818            .filter_map(|arg| {
819                if let FnArg::Typed(pat_type) = arg
820                    && let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref()
821                {
822                    let ident = &pat_ident.ident;
823                    Some(quote!(#ident))
824                } else {
825                    None
826                }
827            })
828            .collect();
829        quote! { #sibling_name #turbofish(#(#all_args),*) }
830    };
831
832    // Build stub args for suppressing unused warnings
833    let stub_args: Vec<proc_macro2::TokenStream> = inputs
834        .iter()
835        .filter_map(|arg| match arg {
836            FnArg::Typed(pat_type) => {
837                if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
838                    let ident = &pat_ident.ident;
839                    Some(quote!(#ident))
840                } else {
841                    None
842                }
843            }
844            FnArg::Receiver(_) => None, // self doesn't need _ = suppression
845        })
846        .collect();
847
848    let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
849
850    let expanded = if let Some(arch) = target_arch {
851        // Sibling function: #[doc(hidden)] #[target_feature] fn __arcane_fn(...)
852        // Always private — only the wrapper is user-visible.
853        // Safe declaration — Rust 2024 allows safe #[target_feature] functions.
854        let sibling_fn = quote! {
855            #[cfg(target_arch = #arch)]
856            #[doc(hidden)]
857            #(#target_feature_attrs)*
858            #inline_attr
859            fn #sibling_name #generics (#sibling_sig_inputs) #output #where_clause {
860                #body
861            }
862        };
863
864        // Wrapper function: fn original_name(...) { unsafe { sibling_call } }
865        // The unsafe block is needed because the sibling has #[target_feature] and
866        // the wrapper doesn't — calling across this boundary requires unsafe.
867        let wrapper_fn = quote! {
868            #[cfg(target_arch = #arch)]
869            #(#attrs)*
870            #vis #sig {
871                // SAFETY: The token parameter proves the required CPU features are available.
872                // Calling a #[target_feature] function from a non-matching context requires
873                // unsafe because the CPU may not support those instructions. The token's
874                // existence proves summon() succeeded, so the features are available.
875                unsafe { #sibling_call }
876            }
877        };
878
879        // Optional stub for other architectures
880        let stub = if args.stub {
881            quote! {
882                #[cfg(not(target_arch = #arch))]
883                #(#attrs)*
884                #vis #sig {
885                    let _ = (#(#stub_args),*);
886                    unreachable!(
887                        "BUG: {}() was called but requires {} (target_arch = \"{}\"). \
888                         {}::summon() returns None on this architecture, so this function \
889                         is unreachable in safe code. If you used forge_token_dangerously(), \
890                         that is the bug.",
891                        stringify!(#fn_name),
892                        #token_type_str,
893                        #arch,
894                        #token_type_str,
895                    )
896                }
897            }
898        } else {
899            quote! {}
900        };
901
902        quote! {
903            #sibling_fn
904            #wrapper_fn
905            #stub
906        }
907    } else {
908        // No specific arch (trait bounds or generic) - no cfg guards, no stub needed.
909        // Still use sibling pattern for consistency. Sibling is always private.
910        let sibling_fn = quote! {
911            #[doc(hidden)]
912            #(#target_feature_attrs)*
913            #inline_attr
914            fn #sibling_name #generics (#sibling_sig_inputs) #output #where_clause {
915                #body
916            }
917        };
918
919        let wrapper_fn = quote! {
920            #(#attrs)*
921            #vis #sig {
922                // SAFETY: The token proves the required CPU features are available.
923                unsafe { #sibling_call }
924            }
925        };
926
927        quote! {
928            #sibling_fn
929            #wrapper_fn
930        }
931    };
932
933    expanded.into()
934}
935
936/// Nested inner function expansion (opt-in via `nested` or `_self = Type`).
937///
938/// This is the original approach: generates a nested inner function inside the
939/// original function. Required when `_self = Type` is used because Self must be
940/// replaced in the nested function (where it's not in scope).
941fn arcane_impl_nested(
942    input_fn: LightFn,
943    args: &ArcaneArgs,
944    target_arch: Option<&str>,
945    token_type_name: Option<String>,
946    target_feature_attrs: Vec<Attribute>,
947    inline_attr: Attribute,
948) -> TokenStream {
949    let vis = &input_fn.vis;
950    let sig = &input_fn.sig;
951    let fn_name = &sig.ident;
952    let generics = &sig.generics;
953    let where_clause = &generics.where_clause;
954    let inputs = &sig.inputs;
955    let output = &sig.output;
956    let body = &input_fn.body;
957    let attrs = &input_fn.attrs;
958
959    // Determine self receiver type if present
960    let self_receiver_kind: Option<SelfReceiver> = inputs.first().and_then(|arg| match arg {
961        FnArg::Receiver(receiver) => {
962            if receiver.reference.is_none() {
963                Some(SelfReceiver::Owned)
964            } else if receiver.mutability.is_some() {
965                Some(SelfReceiver::RefMut)
966            } else {
967                Some(SelfReceiver::Ref)
968            }
969        }
970        _ => None,
971    });
972
973    // Build inner function parameters, transforming self if needed.
974    // Also replace Self in non-self parameter types when _self = Type is set,
975    // since the inner function is a nested fn where Self from the impl is not in scope.
976    let inner_params: Vec<proc_macro2::TokenStream> = inputs
977        .iter()
978        .map(|arg| match arg {
979            FnArg::Receiver(_) => {
980                // Transform self receiver to _self parameter
981                let self_ty = args.self_type.as_ref().unwrap();
982                match self_receiver_kind.as_ref().unwrap() {
983                    SelfReceiver::Owned => quote!(_self: #self_ty),
984                    SelfReceiver::Ref => quote!(_self: &#self_ty),
985                    SelfReceiver::RefMut => quote!(_self: &mut #self_ty),
986                }
987            }
988            FnArg::Typed(pat_type) => {
989                if let Some(ref self_ty) = args.self_type {
990                    replace_self_in_tokens(quote!(#pat_type), self_ty)
991                } else {
992                    quote!(#pat_type)
993                }
994            }
995        })
996        .collect();
997
998    // Build inner function call arguments
999    let inner_args: Vec<proc_macro2::TokenStream> = inputs
1000        .iter()
1001        .filter_map(|arg| match arg {
1002            FnArg::Typed(pat_type) => {
1003                if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
1004                    let ident = &pat_ident.ident;
1005                    Some(quote!(#ident))
1006                } else {
1007                    None
1008                }
1009            }
1010            FnArg::Receiver(_) => Some(quote!(self)), // Pass self to inner as _self
1011        })
1012        .collect();
1013
1014    let inner_fn_name = format_ident!("__simd_inner_{}", fn_name);
1015
1016    // Build turbofish for forwarding type/const generic params to inner function
1017    let turbofish = build_turbofish(generics);
1018
1019    // Transform output, body, and where clause to replace Self with concrete type if needed.
1020    let (inner_output, inner_body, inner_where_clause): (
1021        proc_macro2::TokenStream,
1022        proc_macro2::TokenStream,
1023        proc_macro2::TokenStream,
1024    ) = if let Some(ref self_ty) = args.self_type {
1025        let transformed_output = replace_self_in_tokens(output.to_token_stream(), self_ty);
1026        let transformed_body = replace_self_in_tokens(body.clone(), self_ty);
1027        let transformed_where = where_clause
1028            .as_ref()
1029            .map(|wc| replace_self_in_tokens(wc.to_token_stream(), self_ty))
1030            .unwrap_or_default();
1031        (transformed_output, transformed_body, transformed_where)
1032    } else {
1033        (
1034            output.to_token_stream(),
1035            body.clone(),
1036            where_clause
1037                .as_ref()
1038                .map(|wc| wc.to_token_stream())
1039                .unwrap_or_default(),
1040        )
1041    };
1042
1043    let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
1044    let expanded = if let Some(arch) = target_arch {
1045        let stub = if args.stub {
1046            quote! {
1047                // Stub for other architectures - the token cannot be obtained
1048                #[cfg(not(target_arch = #arch))]
1049                #(#attrs)*
1050                #vis #sig {
1051                    let _ = (#(#inner_args),*);
1052                    unreachable!(
1053                        "BUG: {}() was called but requires {} (target_arch = \"{}\"). \
1054                         {}::summon() returns None on this architecture, so this function \
1055                         is unreachable in safe code. If you used forge_token_dangerously(), \
1056                         that is the bug.",
1057                        stringify!(#fn_name),
1058                        #token_type_str,
1059                        #arch,
1060                        #token_type_str,
1061                    )
1062                }
1063            }
1064        } else {
1065            quote! {}
1066        };
1067
1068        quote! {
1069            // Real implementation for the correct architecture
1070            #[cfg(target_arch = #arch)]
1071            #(#attrs)*
1072            #vis #sig {
1073                #(#target_feature_attrs)*
1074                #inline_attr
1075                fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #inner_where_clause {
1076                    #inner_body
1077                }
1078
1079                // SAFETY: The token parameter proves the required CPU features are available.
1080                unsafe { #inner_fn_name #turbofish(#(#inner_args),*) }
1081            }
1082
1083            #stub
1084        }
1085    } else {
1086        // No specific arch (trait bounds or generic) - generate without cfg guards
1087        quote! {
1088            #(#attrs)*
1089            #vis #sig {
1090                #(#target_feature_attrs)*
1091                #inline_attr
1092                fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #inner_where_clause {
1093                    #inner_body
1094                }
1095
1096                // SAFETY: The token proves the required CPU features are available.
1097                unsafe { #inner_fn_name #turbofish(#(#inner_args),*) }
1098            }
1099        }
1100    };
1101
1102    expanded.into()
1103}
1104
1105/// Mark a function as an arcane SIMD function.
1106///
1107/// This macro generates a safe wrapper around a `#[target_feature]` function.
1108/// The token parameter type determines which CPU features are enabled.
1109///
1110/// # Expansion Modes
1111///
1112/// ## Sibling (default)
1113///
1114/// Generates two functions at the same scope: a safe `#[target_feature]` sibling
1115/// and a safe wrapper. `self`/`Self` work naturally since both functions share scope.
1116/// Compatible with `#![forbid(unsafe_code)]`.
1117///
1118/// ```ignore
1119/// #[arcane]
1120/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { /* body */ }
1121/// // Expands to (x86_64 only):
1122/// #[cfg(target_arch = "x86_64")]
1123/// #[doc(hidden)]
1124/// #[target_feature(enable = "avx2,fma,...")]
1125/// fn __arcane_process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { /* body */ }
1126///
1127/// #[cfg(target_arch = "x86_64")]
1128/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] {
1129///     unsafe { __arcane_process(token, data) }
1130/// }
1131/// ```
1132///
1133/// Methods work naturally:
1134///
1135/// ```ignore
1136/// impl MyType {
1137///     #[arcane]
1138///     fn compute(&self, token: X64V3Token) -> f32 {
1139///         self.data.iter().sum()  // self/Self just work!
1140///     }
1141/// }
1142/// ```
1143///
1144/// ## Nested (`nested` or `_self = Type`)
1145///
1146/// Generates a nested inner function inside the original. Required for trait impls
1147/// (where sibling functions would fail) and when `_self = Type` is used.
1148///
1149/// ```ignore
1150/// impl SimdOps for MyType {
1151///     #[arcane(_self = MyType)]
1152///     fn compute(&self, token: X64V3Token) -> Self {
1153///         // Use _self instead of self, Self replaced with MyType
1154///         _self.data.iter().sum()
1155///     }
1156/// }
1157/// ```
1158///
1159/// # Cross-Architecture Behavior
1160///
1161/// **Default (cfg-out):** On the wrong architecture, the function is not emitted
1162/// at all — no stub, no dead code. Code that references it must be cfg-gated.
1163///
1164/// **With `stub`:** Generates an `unreachable!()` stub on wrong architectures.
1165/// Use when cross-arch dispatch references the function without cfg guards.
1166///
1167/// ```ignore
1168/// #[arcane(stub)]  // generates stub on wrong arch
1169/// fn process_neon(token: NeonToken, data: &[f32]) -> f32 { ... }
1170/// ```
1171///
1172/// `incant!` is unaffected — it already cfg-gates dispatch calls by architecture.
1173///
1174/// # Token Parameter Forms
1175///
1176/// ```ignore
1177/// // Concrete token
1178/// #[arcane]
1179/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { ... }
1180///
1181/// // impl Trait bound
1182/// #[arcane]
1183/// fn process(token: impl HasX64V2, data: &[f32; 8]) -> [f32; 8] { ... }
1184///
1185/// // Generic with inline or where-clause bounds
1186/// #[arcane]
1187/// fn process<T: HasX64V2>(token: T, data: &[f32; 8]) -> [f32; 8] { ... }
1188///
1189/// // Wildcard
1190/// #[arcane]
1191/// fn process(_: X64V3Token, data: &[f32; 8]) -> [f32; 8] { ... }
1192/// ```
1193///
1194/// # Options
1195///
1196/// | Option | Effect |
1197/// |--------|--------|
1198/// | `stub` | Generate `unreachable!()` stub on wrong architecture |
1199/// | `nested` | Use nested inner function instead of sibling |
1200/// | `_self = Type` | Implies `nested`, transforms self receiver, replaces Self |
1201/// | `inline_always` | Use `#[inline(always)]` (requires nightly) |
1202/// | `import_intrinsics` | Auto-import `archmage::intrinsics::{arch}::*` (includes safe memory ops) |
1203/// | `import_magetypes` | Auto-import `magetypes::simd::{ns}::*` and `magetypes::simd::backends::*` |
1204///
1205/// ## Auto-Imports
1206///
1207/// `import_intrinsics` and `import_magetypes` inject `use` statements into the
1208/// function body, eliminating boilerplate. The macro derives the architecture and
1209/// namespace from the token type:
1210///
1211/// ```ignore
1212/// // Without auto-imports — lots of boilerplate:
1213/// use std::arch::x86_64::*;
1214/// use magetypes::simd::v3::*;
1215///
1216/// #[arcane]
1217/// fn process(token: X64V3Token, data: &[f32; 8]) -> f32 {
1218///     let v = f32x8::load(token, data);
1219///     let zero = _mm256_setzero_ps();
1220///     // ...
1221/// }
1222///
1223/// // With auto-imports — clean:
1224/// #[arcane(import_intrinsics, import_magetypes)]
1225/// fn process(token: X64V3Token, data: &[f32; 8]) -> f32 {
1226///     let v = f32x8::load(token, data);
1227///     let zero = _mm256_setzero_ps();
1228///     // ...
1229/// }
1230/// ```
1231///
1232/// The namespace mapping is token-driven:
1233///
1234/// | Token | `import_intrinsics` | `import_magetypes` |
1235/// |-------|--------------------|--------------------|
1236/// | `X64V1..V3Token` | `archmage::intrinsics::x86_64::*` | `magetypes::simd::v3::*` |
1237/// | `X64V4Token` | `archmage::intrinsics::x86_64::*` | `magetypes::simd::v4::*` |
1238/// | `X64V4xToken` | `archmage::intrinsics::x86_64::*` | `magetypes::simd::v4x::*` |
1239/// | `NeonToken` / ARM | `archmage::intrinsics::aarch64::*` | `magetypes::simd::neon::*` |
1240/// | `Wasm128Token` | `archmage::intrinsics::wasm32::*` | `magetypes::simd::wasm128::*` |
1241///
1242/// Works with concrete tokens, `impl Trait` bounds, and generic parameters.
1243///
1244/// # Supported Tokens
1245///
1246/// - **x86_64**: `X64V2Token`, `X64V3Token`/`Desktop64`, `X64V4Token`/`Avx512Token`/`Server64`,
1247///   `X64V4xToken`, `Avx512Fp16Token`, `X64CryptoToken`, `X64V3CryptoToken`
1248/// - **ARM**: `NeonToken`/`Arm64`, `Arm64V2Token`, `Arm64V3Token`,
1249///   `NeonAesToken`, `NeonSha3Token`, `NeonCrcToken`
1250/// - **WASM**: `Wasm128Token`
1251///
1252/// # Supported Trait Bounds
1253///
1254/// `HasX64V2`, `HasX64V4`, `HasNeon`, `HasNeonAes`, `HasNeonSha3`, `HasArm64V2`, `HasArm64V3`
1255///
1256/// ```ignore
1257/// #![feature(target_feature_inline_always)]
1258///
1259/// #[arcane(inline_always)]
1260/// fn fast_kernel(token: Avx2Token, data: &mut [f32]) {
1261///     // Inner function will use #[inline(always)]
1262/// }
1263/// ```
1264#[proc_macro_attribute]
1265pub fn arcane(attr: TokenStream, item: TokenStream) -> TokenStream {
1266    let args = parse_macro_input!(attr as ArcaneArgs);
1267    let input_fn = parse_macro_input!(item as LightFn);
1268    arcane_impl(input_fn, "arcane", args)
1269}
1270
1271/// Legacy alias for [`arcane`].
1272///
1273/// **Deprecated:** Use `#[arcane]` instead. This alias exists only for migration.
1274#[proc_macro_attribute]
1275#[doc(hidden)]
1276pub fn simd_fn(attr: TokenStream, item: TokenStream) -> TokenStream {
1277    let args = parse_macro_input!(attr as ArcaneArgs);
1278    let input_fn = parse_macro_input!(item as LightFn);
1279    arcane_impl(input_fn, "simd_fn", args)
1280}
1281
1282/// Descriptive alias for [`arcane`].
1283///
1284/// Generates a safe wrapper around a `#[target_feature]` inner function.
1285/// The token type in your signature determines which CPU features are enabled.
1286/// Creates an LLVM optimization boundary — use [`token_target_features`]
1287/// (alias for [`rite`]) for inner helpers to avoid this.
1288///
1289/// Since Rust 1.85, value-based SIMD intrinsics are safe inside
1290/// `#[target_feature]` functions. This macro generates the `#[target_feature]`
1291/// wrapper so you never need to write `unsafe` for SIMD code.
1292///
1293/// See [`arcane`] for full documentation and examples.
1294#[proc_macro_attribute]
1295pub fn token_target_features_boundary(attr: TokenStream, item: TokenStream) -> TokenStream {
1296    let args = parse_macro_input!(attr as ArcaneArgs);
1297    let input_fn = parse_macro_input!(item as LightFn);
1298    arcane_impl(input_fn, "token_target_features_boundary", args)
1299}
1300
1301// ============================================================================
1302// Rite macro for inner SIMD functions (inlines into matching #[target_feature] callers)
1303// ============================================================================
1304
1305/// Annotate inner SIMD helpers called from `#[arcane]` functions.
1306///
1307/// Unlike `#[arcane]`, which creates an inner `#[target_feature]` function behind
1308/// a safe boundary, `#[rite]` adds `#[target_feature]` and `#[inline]` directly.
1309/// LLVM inlines it into any caller with matching features — no boundary crossing.
1310///
1311/// # When to Use
1312///
1313/// Use `#[rite]` for helper functions that are **only** called from within
1314/// `#[arcane]` functions with matching or superset token types:
1315///
1316/// ```ignore
1317/// use archmage::{arcane, rite, X64V3Token};
1318///
1319/// #[arcane]
1320/// fn outer(token: X64V3Token, data: &[f32; 8]) -> f32 {
1321///     // helper inlines — same target features, no boundary
1322///     helper(token, data) * 2.0
1323/// }
1324///
1325/// #[rite]
1326/// fn helper(token: X64V3Token, data: &[f32; 8]) -> f32 {
1327///     // Just has #[target_feature(enable = "avx2,fma,...")]
1328///     // Called from #[arcane] context, so features are guaranteed
1329///     let v = f32x8::from_array(token, *data);
1330///     v.reduce_add()
1331/// }
1332/// ```
1333///
1334/// # Safety
1335///
1336/// `#[rite]` functions can only be safely called from contexts where the
1337/// required CPU features are enabled:
1338/// - From within `#[arcane]` functions with matching/superset tokens
1339/// - From within other `#[rite]` functions with matching/superset tokens
1340/// - From code compiled with `-Ctarget-cpu` that enables the features
1341///
1342/// Calling from other contexts requires `unsafe` and the caller must ensure
1343/// the CPU supports the required features.
1344///
1345/// # Cross-Architecture Behavior
1346///
1347/// Like `#[arcane]`, defaults to cfg-out (no function on wrong arch).
1348/// Use `#[rite(stub)]` to generate an unreachable stub instead.
1349///
1350/// # Options
1351///
1352/// | Option | Effect |
1353/// |--------|--------|
1354/// | `stub` | Generate `unreachable!()` stub on wrong architecture |
1355/// | `import_intrinsics` | Auto-import `archmage::intrinsics::{arch}::*` (includes safe memory ops) |
1356/// | `import_magetypes` | Auto-import `magetypes::simd::{ns}::*` and `magetypes::simd::backends::*` |
1357///
1358/// See `#[arcane]` docs for the full namespace mapping table.
1359///
1360/// # Comparison with #[arcane]
1361///
1362/// | Aspect | `#[arcane]` | `#[rite]` |
1363/// |--------|-------------|-----------|
1364/// | Creates wrapper | Yes | No |
1365/// | Entry point | Yes | No |
1366/// | Inlines into caller | No (barrier) | Yes |
1367/// | Safe to call anywhere | Yes (with token) | Only from feature-enabled context |
1368/// | `stub` param | Yes | Yes |
1369/// | `import_intrinsics` | Yes | Yes |
1370/// | `import_magetypes` | Yes | Yes |
1371#[proc_macro_attribute]
1372pub fn rite(attr: TokenStream, item: TokenStream) -> TokenStream {
1373    let args = parse_macro_input!(attr as RiteArgs);
1374    let input_fn = parse_macro_input!(item as LightFn);
1375    rite_impl(input_fn, args)
1376}
1377
1378/// Descriptive alias for [`rite`].
1379///
1380/// Applies `#[target_feature]` + `#[inline]` based on the token type in your
1381/// function signature. No wrapper, no optimization boundary. Use for functions
1382/// called from within `#[arcane]`/`#[token_target_features_boundary]` code.
1383///
1384/// Since Rust 1.85, calling a `#[target_feature]` function from another function
1385/// with matching features is safe — no `unsafe` needed.
1386///
1387/// See [`rite`] for full documentation and examples.
1388#[proc_macro_attribute]
1389pub fn token_target_features(attr: TokenStream, item: TokenStream) -> TokenStream {
1390    let args = parse_macro_input!(attr as RiteArgs);
1391    let input_fn = parse_macro_input!(item as LightFn);
1392    rite_impl(input_fn, args)
1393}
1394
1395/// Arguments for the `#[rite]` macro.
1396#[derive(Default)]
1397struct RiteArgs {
1398    /// Generate an `unreachable!()` stub on the wrong architecture.
1399    /// Default is false (cfg-out: no function emitted on wrong arch).
1400    stub: bool,
1401    /// Inject `use archmage::intrinsics::{arch}::*;` (includes safe memory ops).
1402    import_intrinsics: bool,
1403    /// Inject `use magetypes::simd::{ns}::*;`, `use magetypes::simd::generic::*;`,
1404    /// and `use magetypes::simd::backends::*;`.
1405    import_magetypes: bool,
1406}
1407
1408impl Parse for RiteArgs {
1409    fn parse(input: ParseStream) -> syn::Result<Self> {
1410        let mut args = RiteArgs::default();
1411
1412        while !input.is_empty() {
1413            let ident: Ident = input.parse()?;
1414            match ident.to_string().as_str() {
1415                "stub" => args.stub = true,
1416                "import_intrinsics" => args.import_intrinsics = true,
1417                "import_magetypes" => args.import_magetypes = true,
1418                other => {
1419                    return Err(syn::Error::new(
1420                        ident.span(),
1421                        format!(
1422                            "unknown rite argument: `{}`. Supported: `stub`, \
1423                             `import_intrinsics`, `import_magetypes`.",
1424                            other
1425                        ),
1426                    ));
1427                }
1428            }
1429            if input.peek(Token![,]) {
1430                let _: Token![,] = input.parse()?;
1431            }
1432        }
1433
1434        Ok(args)
1435    }
1436}
1437
1438/// Implementation for the `#[rite]` macro.
1439fn rite_impl(mut input_fn: LightFn, args: RiteArgs) -> TokenStream {
1440    // Find the token parameter and its features
1441    let TokenParamInfo {
1442        features,
1443        target_arch,
1444        magetypes_namespace,
1445        ..
1446    } = match find_token_param(&input_fn.sig) {
1447        Some(result) => result,
1448        None => {
1449            // Check for specific misuse: featureless traits like SimdToken
1450            if let Some(trait_name) = diagnose_featureless_token(&input_fn.sig) {
1451                let msg = format!(
1452                    "`{trait_name}` cannot be used as a token bound in #[rite] \
1453                     because it doesn't specify any CPU features.\n\
1454                     \n\
1455                     #[rite] needs concrete features to generate #[target_feature]. \
1456                     Use a concrete token or a feature trait:\n\
1457                     \n\
1458                     Concrete tokens: X64V3Token, Desktop64, NeonToken, Arm64V2Token, ...\n\
1459                     Feature traits:  impl HasX64V2, impl HasNeon, impl HasArm64V3, ..."
1460                );
1461                return syn::Error::new_spanned(&input_fn.sig, msg)
1462                    .to_compile_error()
1463                    .into();
1464            }
1465            let msg = "rite requires a token parameter. Supported forms:\n\
1466                 - Concrete: `token: X64V3Token`\n\
1467                 - impl Trait: `token: impl HasX64V2`\n\
1468                 - Generic: `fn foo<T: HasX64V2>(token: T, ...)`";
1469            return syn::Error::new_spanned(&input_fn.sig, msg)
1470                .to_compile_error()
1471                .into();
1472        }
1473    };
1474
1475    // Build target_feature attributes
1476    let target_feature_attrs: Vec<Attribute> = features
1477        .iter()
1478        .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
1479        .collect();
1480
1481    // Always use #[inline] - #[inline(always)] + #[target_feature] requires nightly
1482    let inline_attr: Attribute = parse_quote!(#[inline]);
1483
1484    // Prepend attributes to the function
1485    let mut new_attrs = target_feature_attrs;
1486    new_attrs.push(inline_attr);
1487    new_attrs.append(&mut input_fn.attrs);
1488    input_fn.attrs = new_attrs;
1489
1490    // Prepend import statements to body if requested
1491    let body_imports = generate_imports(
1492        target_arch,
1493        magetypes_namespace,
1494        args.import_intrinsics,
1495        args.import_magetypes,
1496    );
1497    if !body_imports.is_empty() {
1498        let original_body = &input_fn.body;
1499        input_fn.body = quote! {
1500            #body_imports
1501            #original_body
1502        };
1503    }
1504
1505    // If we know the target arch, generate cfg-gated impl (+ optional stub)
1506    if let Some(arch) = target_arch {
1507        let vis = &input_fn.vis;
1508        let sig = &input_fn.sig;
1509        let attrs = &input_fn.attrs;
1510        let body = &input_fn.body;
1511
1512        let stub = if args.stub {
1513            quote! {
1514                #[cfg(not(target_arch = #arch))]
1515                #vis #sig {
1516                    unreachable!(concat!(
1517                        "This function requires ",
1518                        #arch,
1519                        " architecture"
1520                    ))
1521                }
1522            }
1523        } else {
1524            quote! {}
1525        };
1526
1527        quote! {
1528            #[cfg(target_arch = #arch)]
1529            #(#attrs)*
1530            #vis #sig {
1531                #body
1532            }
1533
1534            #stub
1535        }
1536        .into()
1537    } else {
1538        // No specific arch (trait bounds) - just emit the annotated function
1539        quote!(#input_fn).into()
1540    }
1541}
1542
1543// =============================================================================
1544// magetypes! macro - generate platform variants from generic function
1545// =============================================================================
1546
1547/// Generate platform-specific variants from a function by replacing `Token`.
1548///
1549/// Use `Token` as a placeholder for the token type. The macro generates
1550/// suffixed variants with `Token` replaced by the concrete token type, and
1551/// each variant wrapped in the appropriate `#[cfg(target_arch = ...)]` guard.
1552///
1553/// # Default tiers
1554///
1555/// Without arguments, generates `_v3`, `_v4`, `_neon`, `_wasm128`, `_scalar`:
1556///
1557/// ```rust,ignore
1558/// #[magetypes]
1559/// fn process(token: Token, data: &[f32]) -> f32 {
1560///     inner_simd_work(token, data)
1561/// }
1562/// ```
1563///
1564/// # Explicit tiers
1565///
1566/// Specify which tiers to generate:
1567///
1568/// ```rust,ignore
1569/// #[magetypes(v1, v3, neon)]
1570/// fn process(token: Token, data: &[f32]) -> f32 {
1571///     inner_simd_work(token, data)
1572/// }
1573/// // Generates: process_v1, process_v3, process_neon, process_scalar
1574/// ```
1575///
1576/// `scalar` is always included implicitly.
1577///
1578/// Known tiers: `v1`, `v2`, `v3`, `v4`, `v4x`, `neon`, `neon_aes`,
1579/// `neon_sha3`, `neon_crc`, `wasm128`, `wasm128_relaxed`, `scalar`.
1580///
1581/// # What gets replaced
1582///
1583/// **Only `Token`** is replaced — with the concrete token type for each variant
1584/// (e.g., `archmage::X64V3Token`, `archmage::ScalarToken`). SIMD types like
1585/// `f32x8` and constants like `LANES` are **not** replaced by this macro.
1586///
1587/// # Usage with incant!
1588///
1589/// The generated variants work with `incant!` for dispatch:
1590///
1591/// ```rust,ignore
1592/// pub fn process_api(data: &[f32]) -> f32 {
1593///     incant!(process(data))
1594/// }
1595///
1596/// // Or with matching explicit tiers:
1597/// pub fn process_api(data: &[f32]) -> f32 {
1598///     incant!(process(data), [v1, v3, neon])
1599/// }
1600/// ```
1601#[proc_macro_attribute]
1602pub fn magetypes(attr: TokenStream, item: TokenStream) -> TokenStream {
1603    let input_fn = parse_macro_input!(item as LightFn);
1604
1605    // Parse optional tier list from attribute args
1606    let tier_names: Vec<String> = if attr.is_empty() {
1607        DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect()
1608    } else {
1609        let parser = |input: ParseStream| input.parse_terminated(Ident::parse, Token![,]);
1610        let idents = match syn::parse::Parser::parse(parser, attr) {
1611            Ok(p) => p,
1612            Err(e) => return e.to_compile_error().into(),
1613        };
1614        idents.iter().map(|i| i.to_string()).collect()
1615    };
1616
1617    let tiers = match resolve_tiers(&tier_names, input_fn.sig.ident.span()) {
1618        Ok(t) => t,
1619        Err(e) => return e.to_compile_error().into(),
1620    };
1621
1622    magetypes_impl(input_fn, &tiers)
1623}
1624
1625fn magetypes_impl(mut input_fn: LightFn, tiers: &[&TierDescriptor]) -> TokenStream {
1626    // Strip user-provided #[arcane] / #[rite] to prevent double-wrapping
1627    // (magetypes auto-adds #[arcane] on non-scalar variants)
1628    input_fn
1629        .attrs
1630        .retain(|attr| !attr.path().is_ident("arcane") && !attr.path().is_ident("rite"));
1631
1632    let fn_name = &input_fn.sig.ident;
1633    let fn_attrs = &input_fn.attrs;
1634
1635    // Convert function to string for text substitution
1636    let fn_str = input_fn.to_token_stream().to_string();
1637
1638    let mut variants = Vec::new();
1639
1640    for tier in tiers {
1641        // Create suffixed function name
1642        let suffixed_name = format!("{}_{}", fn_name, tier.suffix);
1643
1644        // Do text substitution
1645        let mut variant_str = fn_str.clone();
1646
1647        // Replace function name
1648        variant_str = variant_str.replacen(&fn_name.to_string(), &suffixed_name, 1);
1649
1650        // Replace Token type with concrete token
1651        variant_str = variant_str.replace("Token", tier.token_path);
1652
1653        // Parse back to tokens
1654        let variant_tokens: proc_macro2::TokenStream = match variant_str.parse() {
1655            Ok(t) => t,
1656            Err(e) => {
1657                return syn::Error::new_spanned(
1658                    &input_fn,
1659                    format!(
1660                        "Failed to parse generated variant `{}`: {}",
1661                        suffixed_name, e
1662                    ),
1663                )
1664                .to_compile_error()
1665                .into();
1666            }
1667        };
1668
1669        // Add cfg guards
1670        let cfg_guard = match (tier.target_arch, tier.cargo_feature) {
1671            (Some(arch), Some(feature)) => {
1672                quote! { #[cfg(all(target_arch = #arch, feature = #feature))] }
1673            }
1674            (Some(arch), None) => {
1675                quote! { #[cfg(target_arch = #arch)] }
1676            }
1677            (None, Some(feature)) => {
1678                quote! { #[cfg(feature = #feature)] }
1679            }
1680            (None, None) => {
1681                quote! {} // No guard needed (scalar)
1682            }
1683        };
1684
1685        variants.push(if tier.name != "scalar" {
1686            // Non-scalar variants get #[arcane] so target_feature is applied
1687            quote! {
1688                #cfg_guard
1689                #[archmage::arcane]
1690                #variant_tokens
1691            }
1692        } else {
1693            quote! {
1694                #cfg_guard
1695                #variant_tokens
1696            }
1697        });
1698    }
1699
1700    // Remove attributes from the list that should not be duplicated
1701    let filtered_attrs: Vec<_> = fn_attrs
1702        .iter()
1703        .filter(|a| !a.path().is_ident("magetypes"))
1704        .collect();
1705
1706    let output = quote! {
1707        #(#filtered_attrs)*
1708        #(#variants)*
1709    };
1710
1711    output.into()
1712}
1713
1714// =============================================================================
1715// incant! macro - dispatch to platform-specific variants
1716// =============================================================================
1717
1718// =============================================================================
1719// Tier descriptors for incant! and #[magetypes]
1720// =============================================================================
1721
1722/// Describes a dispatch tier for incant! and #[magetypes].
1723struct TierDescriptor {
1724    /// Tier name as written in user code (e.g., "v3", "neon")
1725    name: &'static str,
1726    /// Function suffix (e.g., "v3", "neon", "scalar")
1727    suffix: &'static str,
1728    /// Token type path (e.g., "archmage::X64V3Token")
1729    token_path: &'static str,
1730    /// IntoConcreteToken method name (e.g., "as_x64v3")
1731    as_method: &'static str,
1732    /// Target architecture for cfg guard (None = no guard)
1733    target_arch: Option<&'static str>,
1734    /// Required cargo feature (None = no feature guard)
1735    cargo_feature: Option<&'static str>,
1736    /// Dispatch priority (higher = tried first within same arch)
1737    priority: u32,
1738}
1739
1740/// All known tiers in dispatch-priority order (highest first within arch).
1741const ALL_TIERS: &[TierDescriptor] = &[
1742    // x86: highest to lowest
1743    TierDescriptor {
1744        name: "v4x",
1745        suffix: "v4x",
1746        token_path: "archmage::X64V4xToken",
1747        as_method: "as_x64v4x",
1748        target_arch: Some("x86_64"),
1749        cargo_feature: Some("avx512"),
1750        priority: 50,
1751    },
1752    TierDescriptor {
1753        name: "v4",
1754        suffix: "v4",
1755        token_path: "archmage::X64V4Token",
1756        as_method: "as_x64v4",
1757        target_arch: Some("x86_64"),
1758        cargo_feature: Some("avx512"),
1759        priority: 40,
1760    },
1761    TierDescriptor {
1762        name: "v3_crypto",
1763        suffix: "v3_crypto",
1764        token_path: "archmage::X64V3CryptoToken",
1765        as_method: "as_x64v3_crypto",
1766        target_arch: Some("x86_64"),
1767        cargo_feature: None,
1768        priority: 35,
1769    },
1770    TierDescriptor {
1771        name: "v3",
1772        suffix: "v3",
1773        token_path: "archmage::X64V3Token",
1774        as_method: "as_x64v3",
1775        target_arch: Some("x86_64"),
1776        cargo_feature: None,
1777        priority: 30,
1778    },
1779    TierDescriptor {
1780        name: "x64_crypto",
1781        suffix: "x64_crypto",
1782        token_path: "archmage::X64CryptoToken",
1783        as_method: "as_x64_crypto",
1784        target_arch: Some("x86_64"),
1785        cargo_feature: None,
1786        priority: 25,
1787    },
1788    TierDescriptor {
1789        name: "v2",
1790        suffix: "v2",
1791        token_path: "archmage::X64V2Token",
1792        as_method: "as_x64v2",
1793        target_arch: Some("x86_64"),
1794        cargo_feature: None,
1795        priority: 20,
1796    },
1797    TierDescriptor {
1798        name: "v1",
1799        suffix: "v1",
1800        token_path: "archmage::X64V1Token",
1801        as_method: "as_x64v1",
1802        target_arch: Some("x86_64"),
1803        cargo_feature: None,
1804        priority: 10,
1805    },
1806    // ARM: highest to lowest
1807    TierDescriptor {
1808        name: "arm_v3",
1809        suffix: "arm_v3",
1810        token_path: "archmage::Arm64V3Token",
1811        as_method: "as_arm_v3",
1812        target_arch: Some("aarch64"),
1813        cargo_feature: None,
1814        priority: 50,
1815    },
1816    TierDescriptor {
1817        name: "arm_v2",
1818        suffix: "arm_v2",
1819        token_path: "archmage::Arm64V2Token",
1820        as_method: "as_arm_v2",
1821        target_arch: Some("aarch64"),
1822        cargo_feature: None,
1823        priority: 40,
1824    },
1825    TierDescriptor {
1826        name: "neon_aes",
1827        suffix: "neon_aes",
1828        token_path: "archmage::NeonAesToken",
1829        as_method: "as_neon_aes",
1830        target_arch: Some("aarch64"),
1831        cargo_feature: None,
1832        priority: 30,
1833    },
1834    TierDescriptor {
1835        name: "neon_sha3",
1836        suffix: "neon_sha3",
1837        token_path: "archmage::NeonSha3Token",
1838        as_method: "as_neon_sha3",
1839        target_arch: Some("aarch64"),
1840        cargo_feature: None,
1841        priority: 30,
1842    },
1843    TierDescriptor {
1844        name: "neon_crc",
1845        suffix: "neon_crc",
1846        token_path: "archmage::NeonCrcToken",
1847        as_method: "as_neon_crc",
1848        target_arch: Some("aarch64"),
1849        cargo_feature: None,
1850        priority: 30,
1851    },
1852    TierDescriptor {
1853        name: "neon",
1854        suffix: "neon",
1855        token_path: "archmage::NeonToken",
1856        as_method: "as_neon",
1857        target_arch: Some("aarch64"),
1858        cargo_feature: None,
1859        priority: 20,
1860    },
1861    // WASM
1862    TierDescriptor {
1863        name: "wasm128_relaxed",
1864        suffix: "wasm128_relaxed",
1865        token_path: "archmage::Wasm128RelaxedToken",
1866        as_method: "as_wasm128_relaxed",
1867        target_arch: Some("wasm32"),
1868        cargo_feature: None,
1869        priority: 21,
1870    },
1871    TierDescriptor {
1872        name: "wasm128",
1873        suffix: "wasm128",
1874        token_path: "archmage::Wasm128Token",
1875        as_method: "as_wasm128",
1876        target_arch: Some("wasm32"),
1877        cargo_feature: None,
1878        priority: 20,
1879    },
1880    // Scalar (always last)
1881    TierDescriptor {
1882        name: "scalar",
1883        suffix: "scalar",
1884        token_path: "archmage::ScalarToken",
1885        as_method: "as_scalar",
1886        target_arch: None,
1887        cargo_feature: None,
1888        priority: 0,
1889    },
1890];
1891
1892/// Default tiers (backwards-compatible with pre-explicit behavior).
1893const DEFAULT_TIER_NAMES: &[&str] = &["v4", "v3", "neon", "wasm128", "scalar"];
1894
1895/// Look up a tier by name, returning an error on unknown names.
1896fn find_tier(name: &str) -> Option<&'static TierDescriptor> {
1897    ALL_TIERS.iter().find(|t| t.name == name)
1898}
1899
1900/// Resolve tier names to descriptors, sorted by dispatch priority (highest first).
1901/// Always appends "scalar" if not already present.
1902fn resolve_tiers(
1903    tier_names: &[String],
1904    error_span: proc_macro2::Span,
1905) -> syn::Result<Vec<&'static TierDescriptor>> {
1906    let mut tiers = Vec::new();
1907    for name in tier_names {
1908        match find_tier(name) {
1909            Some(tier) => tiers.push(tier),
1910            None => {
1911                let known: Vec<&str> = ALL_TIERS.iter().map(|t| t.name).collect();
1912                return Err(syn::Error::new(
1913                    error_span,
1914                    format!("unknown tier `{}`. Known tiers: {}", name, known.join(", ")),
1915                ));
1916            }
1917        }
1918    }
1919
1920    // Always include scalar fallback
1921    if !tiers.iter().any(|t| t.name == "scalar") {
1922        tiers.push(find_tier("scalar").unwrap());
1923    }
1924
1925    // Sort by priority (highest first) for correct dispatch order
1926    tiers.sort_by(|a, b| b.priority.cmp(&a.priority));
1927
1928    Ok(tiers)
1929}
1930
1931// =============================================================================
1932// incant! macro - dispatch to platform-specific variants
1933// =============================================================================
1934
1935/// Input for the incant! macro
1936struct IncantInput {
1937    /// Function path to call (e.g. `func` or `module::func`)
1938    func_path: syn::Path,
1939    /// Arguments to pass
1940    args: Vec<syn::Expr>,
1941    /// Optional token variable for passthrough mode
1942    with_token: Option<syn::Expr>,
1943    /// Optional explicit tier list (None = default tiers)
1944    tiers: Option<(Vec<String>, proc_macro2::Span)>,
1945}
1946
1947/// Create a suffixed version of a function path.
1948/// e.g. `module::func` + `"v3"` → `module::func_v3`
1949fn suffix_path(path: &syn::Path, suffix: &str) -> syn::Path {
1950    let mut suffixed = path.clone();
1951    if let Some(last) = suffixed.segments.last_mut() {
1952        last.ident = format_ident!("{}_{}", last.ident, suffix);
1953    }
1954    suffixed
1955}
1956
1957impl Parse for IncantInput {
1958    fn parse(input: ParseStream) -> syn::Result<Self> {
1959        // Parse: function_path(arg1, arg2, ...) [with token_expr] [, [tier1, tier2, ...]]
1960        let func_path: syn::Path = input.parse()?;
1961
1962        // Parse parenthesized arguments
1963        let content;
1964        syn::parenthesized!(content in input);
1965        let args = content
1966            .parse_terminated(syn::Expr::parse, Token![,])?
1967            .into_iter()
1968            .collect();
1969
1970        // Check for optional "with token"
1971        let with_token = if input.peek(Ident) {
1972            let kw: Ident = input.parse()?;
1973            if kw != "with" {
1974                return Err(syn::Error::new_spanned(kw, "expected `with` keyword"));
1975            }
1976            Some(input.parse()?)
1977        } else {
1978            None
1979        };
1980
1981        // Check for optional tier list: , [tier1, tier2, ...]
1982        let tiers = if input.peek(Token![,]) {
1983            let _: Token![,] = input.parse()?;
1984            let bracket_content;
1985            let bracket = syn::bracketed!(bracket_content in input);
1986            let tier_idents = bracket_content.parse_terminated(Ident::parse, Token![,])?;
1987            let tier_names: Vec<String> = tier_idents.iter().map(|i| i.to_string()).collect();
1988            Some((tier_names, bracket.span.join()))
1989        } else {
1990            None
1991        };
1992
1993        Ok(IncantInput {
1994            func_path,
1995            args,
1996            with_token,
1997            tiers,
1998        })
1999    }
2000}
2001
2002/// Dispatch to platform-specific SIMD variants.
2003///
2004/// # Entry Point Mode (no token yet)
2005///
2006/// Summons tokens and dispatches to the best available variant:
2007///
2008/// ```rust,ignore
2009/// pub fn public_api(data: &[f32]) -> f32 {
2010///     incant!(dot(data))
2011/// }
2012/// ```
2013///
2014/// Expands to runtime feature detection + dispatch to `dot_v3`, `dot_v4`,
2015/// `dot_neon`, `dot_wasm128`, or `dot_scalar`.
2016///
2017/// # Explicit Tiers
2018///
2019/// Specify which tiers to dispatch to:
2020///
2021/// ```rust,ignore
2022/// // Only dispatch to v1, v3, neon, and scalar
2023/// pub fn api(data: &[f32]) -> f32 {
2024///     incant!(process(data), [v1, v3, neon])
2025/// }
2026/// ```
2027///
2028/// `scalar` is always included implicitly. Unknown tier names cause a
2029/// compile error. Tiers are automatically sorted into correct dispatch
2030/// order (highest priority first).
2031///
2032/// Known tiers: `v1`, `v2`, `v3`, `v4`, `v4x`, `neon`, `neon_aes`,
2033/// `neon_sha3`, `neon_crc`, `wasm128`, `wasm128_relaxed`, `scalar`.
2034///
2035/// # Passthrough Mode (already have token)
2036///
2037/// Uses compile-time dispatch via `IntoConcreteToken`:
2038///
2039/// ```rust,ignore
2040/// #[arcane]
2041/// fn outer(token: X64V3Token, data: &[f32]) -> f32 {
2042///     incant!(inner(data) with token)
2043/// }
2044/// ```
2045///
2046/// Also supports explicit tiers:
2047///
2048/// ```rust,ignore
2049/// fn inner<T: IntoConcreteToken>(token: T, data: &[f32]) -> f32 {
2050///     incant!(process(data) with token, [v3, neon])
2051/// }
2052/// ```
2053///
2054/// The compiler monomorphizes the dispatch, eliminating non-matching branches.
2055///
2056/// # Variant Naming
2057///
2058/// Functions must have suffixed variants matching the selected tiers:
2059/// - `_v1` for `X64V1Token`
2060/// - `_v2` for `X64V2Token`
2061/// - `_v3` for `X64V3Token`
2062/// - `_v4` for `X64V4Token` (requires `avx512` feature)
2063/// - `_v4x` for `X64V4xToken` (requires `avx512` feature)
2064/// - `_neon` for `NeonToken`
2065/// - `_neon_aes` for `NeonAesToken`
2066/// - `_neon_sha3` for `NeonSha3Token`
2067/// - `_neon_crc` for `NeonCrcToken`
2068/// - `_wasm128` for `Wasm128Token`
2069/// - `_scalar` for `ScalarToken`
2070#[proc_macro]
2071pub fn incant(input: TokenStream) -> TokenStream {
2072    let input = parse_macro_input!(input as IncantInput);
2073    incant_impl(input)
2074}
2075
2076/// Legacy alias for [`incant!`].
2077#[proc_macro]
2078pub fn simd_route(input: TokenStream) -> TokenStream {
2079    let input = parse_macro_input!(input as IncantInput);
2080    incant_impl(input)
2081}
2082
2083/// Descriptive alias for [`incant!`].
2084///
2085/// Dispatches to architecture-specific function variants at runtime.
2086/// Looks for suffixed functions (`_v3`, `_v4`, `_neon`, `_wasm128`, `_scalar`)
2087/// and calls the best one the CPU supports.
2088///
2089/// See [`incant!`] for full documentation and examples.
2090#[proc_macro]
2091pub fn dispatch_variant(input: TokenStream) -> TokenStream {
2092    let input = parse_macro_input!(input as IncantInput);
2093    incant_impl(input)
2094}
2095
2096fn incant_impl(input: IncantInput) -> TokenStream {
2097    let func_path = &input.func_path;
2098    let args = &input.args;
2099
2100    // Resolve tiers
2101    let tier_names: Vec<String> = match &input.tiers {
2102        Some((names, _)) => names.clone(),
2103        None => DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect(),
2104    };
2105    let last_segment_span = func_path
2106        .segments
2107        .last()
2108        .map(|s| s.ident.span())
2109        .unwrap_or_else(proc_macro2::Span::call_site);
2110    let error_span = input
2111        .tiers
2112        .as_ref()
2113        .map(|(_, span)| *span)
2114        .unwrap_or(last_segment_span);
2115
2116    let tiers = match resolve_tiers(&tier_names, error_span) {
2117        Ok(t) => t,
2118        Err(e) => return e.to_compile_error().into(),
2119    };
2120
2121    // Group tiers by architecture for cfg-guarded blocks
2122    // Within each arch, tiers are already sorted by priority (highest first)
2123    if let Some(token_expr) = &input.with_token {
2124        gen_incant_passthrough(func_path, args, token_expr, &tiers)
2125    } else {
2126        gen_incant_entry(func_path, args, &tiers)
2127    }
2128}
2129
2130/// Generate incant! passthrough mode (already have a token).
2131fn gen_incant_passthrough(
2132    func_path: &syn::Path,
2133    args: &[syn::Expr],
2134    token_expr: &syn::Expr,
2135    tiers: &[&TierDescriptor],
2136) -> TokenStream {
2137    let mut dispatch_arms = Vec::new();
2138
2139    // Group non-scalar tiers by (target_arch, cargo_feature) for nested cfg blocks
2140    let mut arch_groups: Vec<(Option<&str>, Option<&str>, Vec<&TierDescriptor>)> = Vec::new();
2141    for tier in tiers {
2142        if tier.name == "scalar" {
2143            continue; // Handle scalar separately at the end
2144        }
2145        let key = (tier.target_arch, tier.cargo_feature);
2146        if let Some(group) = arch_groups.iter_mut().find(|(a, f, _)| (*a, *f) == key) {
2147            group.2.push(tier);
2148        } else {
2149            arch_groups.push((tier.target_arch, tier.cargo_feature, vec![tier]));
2150        }
2151    }
2152
2153    for (target_arch, cargo_feature, group_tiers) in &arch_groups {
2154        let mut tier_checks = Vec::new();
2155        for tier in group_tiers {
2156            let fn_suffixed = suffix_path(func_path, tier.suffix);
2157            let as_method = format_ident!("{}", tier.as_method);
2158            tier_checks.push(quote! {
2159                if let Some(__t) = __incant_token.#as_method() {
2160                    break '__incant #fn_suffixed(__t, #(#args),*);
2161                }
2162            });
2163        }
2164
2165        let inner = quote! { #(#tier_checks)* };
2166
2167        let guarded = match (target_arch, cargo_feature) {
2168            (Some(arch), Some(feat)) => quote! {
2169                #[cfg(target_arch = #arch)]
2170                {
2171                    #[cfg(feature = #feat)]
2172                    { #inner }
2173                }
2174            },
2175            (Some(arch), None) => quote! {
2176                #[cfg(target_arch = #arch)]
2177                { #inner }
2178            },
2179            (None, Some(feat)) => quote! {
2180                #[cfg(feature = #feat)]
2181                { #inner }
2182            },
2183            (None, None) => inner,
2184        };
2185
2186        dispatch_arms.push(guarded);
2187    }
2188
2189    // Scalar fallback (always last)
2190    let fn_scalar = suffix_path(func_path, "scalar");
2191    let scalar_arm = if tiers.iter().any(|t| t.name == "scalar") {
2192        quote! {
2193            if let Some(__t) = __incant_token.as_scalar() {
2194                break '__incant #fn_scalar(__t, #(#args),*);
2195            }
2196            unreachable!("Token did not match any known variant")
2197        }
2198    } else {
2199        quote! { unreachable!("Token did not match any known variant") }
2200    };
2201
2202    let expanded = quote! {
2203        '__incant: {
2204            use archmage::IntoConcreteToken;
2205            let __incant_token = #token_expr;
2206            #(#dispatch_arms)*
2207            #scalar_arm
2208        }
2209    };
2210    expanded.into()
2211}
2212
2213/// Generate incant! entry point mode (summon tokens).
2214fn gen_incant_entry(
2215    func_path: &syn::Path,
2216    args: &[syn::Expr],
2217    tiers: &[&TierDescriptor],
2218) -> TokenStream {
2219    let mut dispatch_arms = Vec::new();
2220
2221    // Group non-scalar tiers by target_arch for cfg blocks.
2222    // Within each arch group, further split by cargo_feature.
2223    let mut arch_groups: Vec<(Option<&str>, Vec<&TierDescriptor>)> = Vec::new();
2224    for tier in tiers {
2225        if tier.name == "scalar" {
2226            continue;
2227        }
2228        if let Some(group) = arch_groups.iter_mut().find(|(a, _)| *a == tier.target_arch) {
2229            group.1.push(tier);
2230        } else {
2231            arch_groups.push((tier.target_arch, vec![tier]));
2232        }
2233    }
2234
2235    for (target_arch, group_tiers) in &arch_groups {
2236        let mut tier_checks = Vec::new();
2237        for tier in group_tiers {
2238            let fn_suffixed = suffix_path(func_path, tier.suffix);
2239            let token_path: syn::Path = syn::parse_str(tier.token_path).unwrap();
2240
2241            let check = quote! {
2242                if let Some(__t) = #token_path::summon() {
2243                    break '__incant #fn_suffixed(__t, #(#args),*);
2244                }
2245            };
2246
2247            if let Some(feat) = tier.cargo_feature {
2248                tier_checks.push(quote! {
2249                    #[cfg(feature = #feat)]
2250                    { #check }
2251                });
2252            } else {
2253                tier_checks.push(check);
2254            }
2255        }
2256
2257        let inner = quote! { #(#tier_checks)* };
2258
2259        if let Some(arch) = target_arch {
2260            dispatch_arms.push(quote! {
2261                #[cfg(target_arch = #arch)]
2262                { #inner }
2263            });
2264        } else {
2265            dispatch_arms.push(inner);
2266        }
2267    }
2268
2269    // Scalar fallback
2270    let fn_scalar = suffix_path(func_path, "scalar");
2271
2272    let expanded = quote! {
2273        '__incant: {
2274            use archmage::SimdToken;
2275            #(#dispatch_arms)*
2276            #fn_scalar(archmage::ScalarToken, #(#args),*)
2277        }
2278    };
2279    expanded.into()
2280}
2281
2282// =============================================================================
2283// autoversion - combined variant generation + dispatch
2284// =============================================================================
2285
2286/// Arguments to the `#[autoversion]` macro.
2287struct AutoversionArgs {
2288    /// The concrete type to use for `self` receiver (inherent methods only).
2289    self_type: Option<Type>,
2290    /// Explicit tier names (None = default tiers).
2291    tiers: Option<Vec<String>>,
2292}
2293
2294impl Parse for AutoversionArgs {
2295    fn parse(input: ParseStream) -> syn::Result<Self> {
2296        let mut self_type = None;
2297        let mut tier_names = Vec::new();
2298
2299        while !input.is_empty() {
2300            let ident: Ident = input.parse()?;
2301            if ident == "_self" {
2302                let _: Token![=] = input.parse()?;
2303                self_type = Some(input.parse()?);
2304            } else {
2305                // Treat as tier name — validated later by resolve_tiers
2306                tier_names.push(ident.to_string());
2307            }
2308            if input.peek(Token![,]) {
2309                let _: Token![,] = input.parse()?;
2310            }
2311        }
2312
2313        Ok(AutoversionArgs {
2314            self_type,
2315            tiers: if tier_names.is_empty() {
2316                None
2317            } else {
2318                Some(tier_names)
2319            },
2320        })
2321    }
2322}
2323
2324/// Information about the `SimdToken` parameter found in a function signature.
2325struct SimdTokenParamInfo {
2326    /// Index of the parameter in `sig.inputs`
2327    index: usize,
2328    /// The parameter identifier
2329    #[allow(dead_code)]
2330    ident: Ident,
2331}
2332
2333/// Find the `SimdToken` parameter in a function signature.
2334///
2335/// Searches all typed parameters for one whose type path ends in `SimdToken`.
2336/// Returns the parameter index and identifier, or `None` if not found.
2337fn find_simd_token_param(sig: &Signature) -> Option<SimdTokenParamInfo> {
2338    for (i, arg) in sig.inputs.iter().enumerate() {
2339        if let FnArg::Typed(PatType { pat, ty, .. }) = arg
2340            && let Type::Path(type_path) = ty.as_ref()
2341            && let Some(seg) = type_path.path.segments.last()
2342            && seg.ident == "SimdToken"
2343        {
2344            let ident = match pat.as_ref() {
2345                syn::Pat::Ident(pi) => pi.ident.clone(),
2346                syn::Pat::Wild(w) => Ident::new("__autoversion_token", w.underscore_token.span),
2347                _ => continue,
2348            };
2349            return Some(SimdTokenParamInfo { index: i, ident });
2350        }
2351    }
2352    None
2353}
2354
2355/// Core implementation for `#[autoversion]`.
2356///
2357/// Generates suffixed SIMD variants (like `#[magetypes]`) and a runtime
2358/// dispatcher function (like `incant!`) from a single annotated function.
2359fn autoversion_impl(mut input_fn: LightFn, args: AutoversionArgs) -> TokenStream {
2360    // Check for self receiver
2361    let has_self = input_fn
2362        .sig
2363        .inputs
2364        .first()
2365        .is_some_and(|arg| matches!(arg, FnArg::Receiver(_)));
2366
2367    // _self = Type is only needed for trait impls (nested mode in #[arcane]).
2368    // For inherent methods, self/Self work naturally in sibling mode.
2369
2370    // Find SimdToken parameter
2371    let token_param = match find_simd_token_param(&input_fn.sig) {
2372        Some(p) => p,
2373        None => {
2374            return syn::Error::new_spanned(
2375                &input_fn.sig,
2376                "autoversion requires a `SimdToken` parameter.\n\
2377                 Example: fn process(token: SimdToken, data: &[f32]) -> f32 { ... }\n\n\
2378                 SimdToken is the dispatch placeholder — autoversion replaces it \
2379                 with concrete token types and generates a runtime dispatcher.",
2380            )
2381            .to_compile_error()
2382            .into();
2383        }
2384    };
2385
2386    // Resolve tiers
2387    let tier_names: Vec<String> = match &args.tiers {
2388        Some(names) => names.clone(),
2389        None => DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect(),
2390    };
2391    let tiers = match resolve_tiers(&tier_names, input_fn.sig.ident.span()) {
2392        Ok(t) => t,
2393        Err(e) => return e.to_compile_error().into(),
2394    };
2395
2396    // Strip #[arcane] / #[rite] to prevent double-wrapping
2397    input_fn
2398        .attrs
2399        .retain(|attr| !attr.path().is_ident("arcane") && !attr.path().is_ident("rite"));
2400
2401    let fn_name = &input_fn.sig.ident;
2402    let vis = input_fn.vis.clone();
2403
2404    // Move attrs to dispatcher only; variants get no user attrs
2405    let fn_attrs: Vec<Attribute> = input_fn.attrs.drain(..).collect();
2406
2407    // =========================================================================
2408    // Generate suffixed variants
2409    // =========================================================================
2410    //
2411    // AST manipulation only — we clone the parsed LightFn and swap the token
2412    // param's type annotation. No serialize/reparse round-trip. The body is
2413    // never touched unless _self = Type requires a `let _self = self;`
2414    // preamble on the scalar variant.
2415
2416    let mut variants = Vec::new();
2417
2418    for tier in &tiers {
2419        let mut variant_fn = input_fn.clone();
2420
2421        // Variants are always private — only the dispatcher is public.
2422        variant_fn.vis = syn::Visibility::Inherited;
2423
2424        // Rename: process → process_v3
2425        variant_fn.sig.ident = format_ident!("{}_{}", fn_name, tier.suffix);
2426
2427        // Replace SimdToken param type with concrete token type
2428        let concrete_type: Type = syn::parse_str(tier.token_path).unwrap();
2429        if let FnArg::Typed(pt) = &mut variant_fn.sig.inputs[token_param.index] {
2430            *pt.ty = concrete_type;
2431        }
2432
2433        // Scalar with _self = Type: inject `let _self = self;` preamble so body's
2434        // _self references resolve (non-scalar variants get this from #[arcane(_self = Type)])
2435        if tier.name == "scalar" && has_self && args.self_type.is_some() {
2436            let original_body = variant_fn.body.clone();
2437            variant_fn.body = quote!(let _self = self; #original_body);
2438        }
2439
2440        // cfg guard
2441        let cfg_guard = match (tier.target_arch, tier.cargo_feature) {
2442            (Some(arch), Some(feature)) => {
2443                quote! { #[cfg(all(target_arch = #arch, feature = #feature))] }
2444            }
2445            (Some(arch), None) => quote! { #[cfg(target_arch = #arch)] },
2446            (None, Some(feature)) => quote! { #[cfg(feature = #feature)] },
2447            (None, None) => quote! {},
2448        };
2449
2450        if tier.name != "scalar" {
2451            // Non-scalar: add #[arcane] (with _self if needed)
2452            let arcane_attr = if let Some(ref self_type) = args.self_type {
2453                quote! { #[archmage::arcane(_self = #self_type)] }
2454            } else {
2455                quote! { #[archmage::arcane] }
2456            };
2457            variants.push(quote! {
2458                #cfg_guard
2459                #arcane_attr
2460                #variant_fn
2461            });
2462        } else {
2463            variants.push(quote! {
2464                #cfg_guard
2465                #variant_fn
2466            });
2467        }
2468    }
2469
2470    // =========================================================================
2471    // Generate dispatcher (adapted from gen_incant_entry)
2472    // =========================================================================
2473
2474    // Build dispatcher inputs: original params minus SimdToken
2475    let mut dispatcher_inputs: Vec<FnArg> = input_fn.sig.inputs.iter().cloned().collect();
2476    dispatcher_inputs.remove(token_param.index);
2477
2478    // Rename wildcard params so we can pass them as arguments
2479    let mut wild_counter = 0u32;
2480    for arg in &mut dispatcher_inputs {
2481        if let FnArg::Typed(pat_type) = arg
2482            && matches!(pat_type.pat.as_ref(), syn::Pat::Wild(_))
2483        {
2484            let ident = format_ident!("__autoversion_wild_{}", wild_counter);
2485            wild_counter += 1;
2486            *pat_type.pat = syn::Pat::Ident(syn::PatIdent {
2487                attrs: vec![],
2488                by_ref: None,
2489                mutability: None,
2490                ident,
2491                subpat: None,
2492            });
2493        }
2494    }
2495
2496    // Collect argument idents for dispatch calls (exclude self receiver)
2497    let dispatch_args: Vec<Ident> = dispatcher_inputs
2498        .iter()
2499        .filter_map(|arg| {
2500            if let FnArg::Typed(PatType { pat, .. }) = arg
2501                && let syn::Pat::Ident(pi) = pat.as_ref()
2502            {
2503                return Some(pi.ident.clone());
2504            }
2505            None
2506        })
2507        .collect();
2508
2509    // Build turbofish for forwarding type/const generics to variant calls
2510    let turbofish = build_turbofish(&input_fn.sig.generics);
2511
2512    // Group non-scalar tiers by target_arch for cfg blocks
2513    let mut arch_groups: Vec<(Option<&str>, Vec<&&TierDescriptor>)> = Vec::new();
2514    for tier in &tiers {
2515        if tier.name == "scalar" {
2516            continue;
2517        }
2518        if let Some(group) = arch_groups.iter_mut().find(|(a, _)| *a == tier.target_arch) {
2519            group.1.push(tier);
2520        } else {
2521            arch_groups.push((tier.target_arch, vec![tier]));
2522        }
2523    }
2524
2525    let mut dispatch_arms = Vec::new();
2526    for (target_arch, group_tiers) in &arch_groups {
2527        let mut tier_checks = Vec::new();
2528        for tier in group_tiers {
2529            let suffixed = format_ident!("{}_{}", fn_name, tier.suffix);
2530            let token_path: syn::Path = syn::parse_str(tier.token_path).unwrap();
2531
2532            let call = if has_self {
2533                quote! { self.#suffixed #turbofish(__t, #(#dispatch_args),*) }
2534            } else {
2535                quote! { #suffixed #turbofish(__t, #(#dispatch_args),*) }
2536            };
2537
2538            let check = quote! {
2539                if let Some(__t) = #token_path::summon() {
2540                    break '__dispatch #call;
2541                }
2542            };
2543
2544            if let Some(feat) = tier.cargo_feature {
2545                tier_checks.push(quote! {
2546                    #[cfg(feature = #feat)]
2547                    { #check }
2548                });
2549            } else {
2550                tier_checks.push(check);
2551            }
2552        }
2553
2554        let inner = quote! { #(#tier_checks)* };
2555
2556        if let Some(arch) = target_arch {
2557            dispatch_arms.push(quote! {
2558                #[cfg(target_arch = #arch)]
2559                { #inner }
2560            });
2561        } else {
2562            dispatch_arms.push(inner);
2563        }
2564    }
2565
2566    // Scalar fallback (always available, no summon needed)
2567    let scalar_name = format_ident!("{}_scalar", fn_name);
2568    let scalar_call = if has_self {
2569        quote! { self.#scalar_name #turbofish(archmage::ScalarToken, #(#dispatch_args),*) }
2570    } else {
2571        quote! { #scalar_name #turbofish(archmage::ScalarToken, #(#dispatch_args),*) }
2572    };
2573
2574    // Build dispatcher function
2575    let dispatcher_inputs_punct: syn::punctuated::Punctuated<FnArg, Token![,]> =
2576        dispatcher_inputs.into_iter().collect();
2577    let output = &input_fn.sig.output;
2578    let generics = &input_fn.sig.generics;
2579    let where_clause = &generics.where_clause;
2580
2581    let dispatcher = quote! {
2582        #(#fn_attrs)*
2583        #vis fn #fn_name #generics (#dispatcher_inputs_punct) #output #where_clause {
2584            '__dispatch: {
2585                use archmage::SimdToken;
2586                #(#dispatch_arms)*
2587                #scalar_call
2588            }
2589        }
2590    };
2591
2592    let expanded = quote! {
2593        #dispatcher
2594        #(#variants)*
2595    };
2596
2597    expanded.into()
2598}
2599
2600/// Let the compiler auto-vectorize scalar code for each architecture.
2601///
2602/// Write a plain scalar function with a `SimdToken` placeholder parameter.
2603/// `#[autoversion]` generates architecture-specific copies — each compiled
2604/// with different `#[target_feature]` flags via `#[arcane]` — plus a runtime
2605/// dispatcher that calls the best one the CPU supports.
2606///
2607/// You don't touch intrinsics, don't import SIMD types, don't think about
2608/// lane widths. The compiler's auto-vectorizer does the work; you give it
2609/// permission via `#[target_feature]`, which `#[autoversion]` handles.
2610///
2611/// # The simple win
2612///
2613/// ```rust,ignore
2614/// use archmage::SimdToken;
2615///
2616/// #[autoversion]
2617/// fn sum_of_squares(_token: SimdToken, data: &[f32]) -> f32 {
2618///     let mut sum = 0.0f32;
2619///     for &x in data {
2620///         sum += x * x;
2621///     }
2622///     sum
2623/// }
2624///
2625/// // Call directly — no token, no unsafe:
2626/// let result = sum_of_squares(&my_data);
2627/// ```
2628///
2629/// The `_token` parameter is never used in the body. It exists so the macro
2630/// knows where to substitute concrete token types. Each generated variant
2631/// gets `#[arcane]` → `#[target_feature(enable = "avx2,fma,...")]`, which
2632/// unlocks the compiler's auto-vectorizer for that feature set.
2633///
2634/// On x86-64 with the `_v3` variant (AVX2+FMA), that loop compiles to
2635/// `vfmadd231ps` — fused multiply-add on 8 floats per cycle. On aarch64
2636/// with NEON, you get `fmla`. The `_scalar` fallback compiles without any
2637/// SIMD target features, as a safety net for unknown hardware.
2638///
2639/// # Chunks + remainder
2640///
2641/// The classic data-processing pattern works naturally:
2642///
2643/// ```rust,ignore
2644/// #[autoversion]
2645/// fn normalize(_token: SimdToken, data: &mut [f32], scale: f32) {
2646///     // Compiler auto-vectorizes this — no manual SIMD needed.
2647///     // On v3, this becomes vdivps + vmulps on 8 floats at a time.
2648///     for x in data.iter_mut() {
2649///         *x = (*x - 128.0) * scale;
2650///     }
2651/// }
2652/// ```
2653///
2654/// If you want explicit control over chunk boundaries (e.g., for
2655/// accumulator patterns), that works too:
2656///
2657/// ```rust,ignore
2658/// #[autoversion]
2659/// fn dot_product(_token: SimdToken, a: &[f32], b: &[f32]) -> f32 {
2660///     let n = a.len().min(b.len());
2661///     let mut sum = 0.0f32;
2662///     for i in 0..n {
2663///         sum += a[i] * b[i];
2664///     }
2665///     sum
2666/// }
2667/// ```
2668///
2669/// The compiler decides the chunk size based on the target features of each
2670/// variant (8 floats for AVX2, 4 for NEON, 1 for scalar).
2671///
2672/// # What gets generated
2673///
2674/// With default tiers, `#[autoversion] fn process(_t: SimdToken, data: &[f32]) -> f32`
2675/// expands to:
2676///
2677/// - `process_v4(token: X64V4Token, ...)` — AVX-512 (behind `#[cfg(feature = "avx512")]`)
2678/// - `process_v3(token: X64V3Token, ...)` — AVX2+FMA
2679/// - `process_neon(token: NeonToken, ...)` — aarch64 NEON
2680/// - `process_wasm128(token: Wasm128Token, ...)` — WASM SIMD
2681/// - `process_scalar(token: ScalarToken, ...)` — no SIMD, always available
2682/// - `process(data: &[f32]) -> f32` — **dispatcher** (SimdToken param removed)
2683///
2684/// Each non-scalar variant is wrapped in `#[arcane]` (for `#[target_feature]`)
2685/// and `#[cfg(target_arch = ...)]`. The dispatcher does runtime CPU feature
2686/// detection via `Token::summon()` and calls the best match. When compiled
2687/// with `-C target-cpu=native`, the detection is elided by the compiler.
2688///
2689/// The suffixed variants are private sibling functions — only the dispatcher
2690/// is public. Within the same module, you can call them directly for testing
2691/// or benchmarking.
2692///
2693/// # SimdToken replacement
2694///
2695/// `#[autoversion]` replaces the `SimdToken` type annotation in the function
2696/// signature with the concrete token type for each variant (e.g.,
2697/// `archmage::X64V3Token`). Only the parameter's type changes — the function
2698/// body is never reparsed, which keeps compile times low.
2699///
2700/// The token variable (whatever you named it — `token`, `_token`, `_t`)
2701/// keeps working in the body because its type comes from the signature.
2702/// So `f32x8::from_array(token, ...)` works — `token` is now an `X64V3Token`
2703/// which satisfies the same trait bounds as `SimdToken`.
2704///
2705/// `#[magetypes]` takes a different approach: it replaces the text `Token`
2706/// everywhere in the function — signature and body — via string substitution.
2707/// Use `#[magetypes]` when you need body-level type substitution (e.g.,
2708/// `Token`-dependent constants or type aliases that differ per variant).
2709/// Use `#[autoversion]` when you want compiler auto-vectorization of scalar
2710/// code with zero boilerplate.
2711///
2712/// # Benchmarking
2713///
2714/// Measure the speedup with a side-by-side comparison. The generated
2715/// `_scalar` variant serves as the baseline; the dispatcher picks the
2716/// best available:
2717///
2718/// ```rust,ignore
2719/// use criterion::{Criterion, black_box, criterion_group, criterion_main};
2720/// use archmage::SimdToken;
2721///
2722/// #[autoversion]
2723/// fn sum_squares(_token: SimdToken, data: &[f32]) -> f32 {
2724///     data.iter().map(|&x| x * x).fold(0.0f32, |a, b| a + b)
2725/// }
2726///
2727/// fn bench(c: &mut Criterion) {
2728///     let data: Vec<f32> = (0..4096).map(|i| i as f32 * 0.01).collect();
2729///     let mut group = c.benchmark_group("sum_squares");
2730///
2731///     // Dispatched — picks best available at runtime
2732///     group.bench_function("dispatched", |b| {
2733///         b.iter(|| sum_squares(black_box(&data)))
2734///     });
2735///
2736///     // Scalar baseline — no target_feature, no auto-vectorization
2737///     group.bench_function("scalar", |b| {
2738///         b.iter(|| sum_squares_scalar(archmage::ScalarToken, black_box(&data)))
2739///     });
2740///
2741///     // Specific tier (useful for isolating which tier wins)
2742///     #[cfg(target_arch = "x86_64")]
2743///     if let Some(t) = archmage::X64V3Token::summon() {
2744///         group.bench_function("v3_avx2_fma", |b| {
2745///             b.iter(|| sum_squares_v3(t, black_box(&data)));
2746///         });
2747///     }
2748///
2749///     group.finish();
2750/// }
2751///
2752/// criterion_group!(benches, bench);
2753/// criterion_main!(benches);
2754/// ```
2755///
2756/// For a tight numeric loop on x86-64, the `_v3` variant (AVX2+FMA)
2757/// typically runs 4-8x faster than `_scalar` because `#[target_feature]`
2758/// unlocks auto-vectorization that the baseline build can't use.
2759///
2760/// # Explicit tiers
2761///
2762/// ```rust,ignore
2763/// #[autoversion(v3, v4, v4x, neon, arm_v2, wasm128)]
2764/// fn process(_token: SimdToken, data: &[f32]) -> f32 {
2765///     // ...
2766/// }
2767/// ```
2768///
2769/// `scalar` is always included implicitly.
2770///
2771/// Default tiers (when no list given): `v4`, `v3`, `neon`, `wasm128`, `scalar`.
2772///
2773/// Known tiers: `v1`, `v2`, `v3`, `v3_crypto`, `v4`, `v4x`, `neon`,
2774/// `neon_aes`, `neon_sha3`, `neon_crc`, `arm_v2`, `arm_v3`, `wasm128`,
2775/// `wasm128_relaxed`, `x64_crypto`, `scalar`.
2776///
2777/// # Methods with self receivers
2778///
2779/// For inherent methods, `self` works naturally — no `_self` needed:
2780///
2781/// ```rust,ignore
2782/// impl ImageBuffer {
2783///     #[autoversion]
2784///     fn normalize(&mut self, token: SimdToken, gamma: f32) {
2785///         for pixel in &mut self.data {
2786///             *pixel = (*pixel / 255.0).powf(gamma);
2787///         }
2788///     }
2789/// }
2790///
2791/// // Call normally — no token:
2792/// buffer.normalize(2.2);
2793/// ```
2794///
2795/// All receiver types work: `self`, `&self`, `&mut self`. Non-scalar variants
2796/// get `#[arcane]` (sibling mode), where `self`/`Self` resolve naturally.
2797///
2798/// # Trait methods (requires `_self = Type`)
2799///
2800/// Trait methods can't use `#[autoversion]` directly because proc macro
2801/// attributes on trait impl items can't expand to multiple sibling functions.
2802/// Use the delegation pattern with `_self = Type`:
2803///
2804/// ```rust,ignore
2805/// trait Processor {
2806///     fn process(&self, data: &[f32]) -> f32;
2807/// }
2808///
2809/// impl Processor for MyType {
2810///     fn process(&self, data: &[f32]) -> f32 {
2811///         self.process_impl(data) // delegate to autoversioned method
2812///     }
2813/// }
2814///
2815/// impl MyType {
2816///     #[autoversion(_self = MyType)]
2817///     fn process_impl(&self, token: SimdToken, data: &[f32]) -> f32 {
2818///         _self.weights.iter().zip(data).map(|(w, d)| w * d).sum()
2819///     }
2820/// }
2821/// ```
2822///
2823/// `_self = Type` uses nested mode in `#[arcane]`, which is required for
2824/// trait impls. Use `_self` (not `self`) in the body when using this form.
2825///
2826/// # Comparison with `#[magetypes]` + `incant!`
2827///
2828/// | | `#[autoversion]` | `#[magetypes]` + `incant!` |
2829/// |---|---|---|
2830/// | Placeholder | `SimdToken` | `Token` |
2831/// | Generates variants | Yes | Yes (magetypes) |
2832/// | Generates dispatcher | Yes | No (you write `incant!`) |
2833/// | Best for | Scalar auto-vectorization | Explicit SIMD with typed vectors |
2834/// | Lines of code | 1 attribute | 2+ (magetypes + incant + arcane) |
2835///
2836/// Use `#[autoversion]` for scalar loops you want auto-vectorized. Use
2837/// `#[magetypes]` + `incant!` when you need `f32x8`, `u8x32`, and
2838/// hand-tuned SIMD code per architecture
2839#[proc_macro_attribute]
2840pub fn autoversion(attr: TokenStream, item: TokenStream) -> TokenStream {
2841    let args = parse_macro_input!(attr as AutoversionArgs);
2842    let input_fn = parse_macro_input!(item as LightFn);
2843    autoversion_impl(input_fn, args)
2844}
2845
2846// =============================================================================
2847// Unit tests for token/trait recognition maps
2848// =============================================================================
2849
2850#[cfg(test)]
2851mod tests {
2852    use super::*;
2853
2854    use super::generated::{ALL_CONCRETE_TOKENS, ALL_TRAIT_NAMES};
2855    use syn::{ItemFn, ReturnType};
2856
2857    #[test]
2858    fn every_concrete_token_is_in_token_to_features() {
2859        for &name in ALL_CONCRETE_TOKENS {
2860            assert!(
2861                token_to_features(name).is_some(),
2862                "Token `{}` exists in runtime crate but is NOT recognized by \
2863                 token_to_features() in the proc macro. Add it!",
2864                name
2865            );
2866        }
2867    }
2868
2869    #[test]
2870    fn every_trait_is_in_trait_to_features() {
2871        for &name in ALL_TRAIT_NAMES {
2872            assert!(
2873                trait_to_features(name).is_some(),
2874                "Trait `{}` exists in runtime crate but is NOT recognized by \
2875                 trait_to_features() in the proc macro. Add it!",
2876                name
2877            );
2878        }
2879    }
2880
2881    #[test]
2882    fn token_aliases_map_to_same_features() {
2883        // Desktop64 = X64V3Token
2884        assert_eq!(
2885            token_to_features("Desktop64"),
2886            token_to_features("X64V3Token"),
2887            "Desktop64 and X64V3Token should map to identical features"
2888        );
2889
2890        // Server64 = X64V4Token = Avx512Token
2891        assert_eq!(
2892            token_to_features("Server64"),
2893            token_to_features("X64V4Token"),
2894            "Server64 and X64V4Token should map to identical features"
2895        );
2896        assert_eq!(
2897            token_to_features("X64V4Token"),
2898            token_to_features("Avx512Token"),
2899            "X64V4Token and Avx512Token should map to identical features"
2900        );
2901
2902        // Arm64 = NeonToken
2903        assert_eq!(
2904            token_to_features("Arm64"),
2905            token_to_features("NeonToken"),
2906            "Arm64 and NeonToken should map to identical features"
2907        );
2908    }
2909
2910    #[test]
2911    fn trait_to_features_includes_tokens_as_bounds() {
2912        // Tier tokens should also work as trait bounds
2913        // (for `impl X64V3Token` patterns, even though Rust won't allow it,
2914        // the macro processes AST before type checking)
2915        let tier_tokens = [
2916            "X64V2Token",
2917            "X64CryptoToken",
2918            "X64V3Token",
2919            "Desktop64",
2920            "Avx2FmaToken",
2921            "X64V4Token",
2922            "Avx512Token",
2923            "Server64",
2924            "X64V4xToken",
2925            "Avx512Fp16Token",
2926            "NeonToken",
2927            "Arm64",
2928            "NeonAesToken",
2929            "NeonSha3Token",
2930            "NeonCrcToken",
2931            "Arm64V2Token",
2932            "Arm64V3Token",
2933        ];
2934
2935        for &name in &tier_tokens {
2936            assert!(
2937                trait_to_features(name).is_some(),
2938                "Tier token `{}` should also be recognized in trait_to_features() \
2939                 for use as a generic bound. Add it!",
2940                name
2941            );
2942        }
2943    }
2944
2945    #[test]
2946    fn trait_features_are_cumulative() {
2947        // HasX64V4 should include all HasX64V2 features plus more
2948        let v2_features = trait_to_features("HasX64V2").unwrap();
2949        let v4_features = trait_to_features("HasX64V4").unwrap();
2950
2951        for &f in v2_features {
2952            assert!(
2953                v4_features.contains(&f),
2954                "HasX64V4 should include v2 feature `{}` but doesn't",
2955                f
2956            );
2957        }
2958
2959        // v4 should have more features than v2
2960        assert!(
2961            v4_features.len() > v2_features.len(),
2962            "HasX64V4 should have more features than HasX64V2"
2963        );
2964    }
2965
2966    #[test]
2967    fn x64v3_trait_features_include_v2() {
2968        // X64V3Token as trait bound should include v2 features
2969        let v2 = trait_to_features("HasX64V2").unwrap();
2970        let v3 = trait_to_features("X64V3Token").unwrap();
2971
2972        for &f in v2 {
2973            assert!(
2974                v3.contains(&f),
2975                "X64V3Token trait features should include v2 feature `{}` but don't",
2976                f
2977            );
2978        }
2979    }
2980
2981    #[test]
2982    fn has_neon_aes_includes_neon() {
2983        let neon = trait_to_features("HasNeon").unwrap();
2984        let neon_aes = trait_to_features("HasNeonAes").unwrap();
2985
2986        for &f in neon {
2987            assert!(
2988                neon_aes.contains(&f),
2989                "HasNeonAes should include NEON feature `{}`",
2990                f
2991            );
2992        }
2993    }
2994
2995    #[test]
2996    fn no_removed_traits_are_recognized() {
2997        // These traits were removed in 0.3.0 and should NOT be recognized
2998        let removed = [
2999            "HasSse",
3000            "HasSse2",
3001            "HasSse41",
3002            "HasSse42",
3003            "HasAvx",
3004            "HasAvx2",
3005            "HasFma",
3006            "HasAvx512f",
3007            "HasAvx512bw",
3008            "HasAvx512vl",
3009            "HasAvx512vbmi2",
3010            "HasSve",
3011            "HasSve2",
3012        ];
3013
3014        for &name in &removed {
3015            assert!(
3016                trait_to_features(name).is_none(),
3017                "Removed trait `{}` should NOT be in trait_to_features(). \
3018                 It was removed in 0.3.0 — users should migrate to tier traits.",
3019                name
3020            );
3021        }
3022    }
3023
3024    #[test]
3025    fn no_nonexistent_tokens_are_recognized() {
3026        // These tokens don't exist and should NOT be recognized
3027        let fake = [
3028            "SveToken",
3029            "Sve2Token",
3030            "Avx512VnniToken",
3031            "X64V4ModernToken",
3032            "NeonFp16Token",
3033        ];
3034
3035        for &name in &fake {
3036            assert!(
3037                token_to_features(name).is_none(),
3038                "Non-existent token `{}` should NOT be in token_to_features()",
3039                name
3040            );
3041        }
3042    }
3043
3044    #[test]
3045    fn featureless_traits_are_not_in_registries() {
3046        // SimdToken and IntoConcreteToken should NOT be in any feature registry
3047        // because they don't map to CPU features
3048        for &name in FEATURELESS_TRAIT_NAMES {
3049            assert!(
3050                token_to_features(name).is_none(),
3051                "`{}` should NOT be in token_to_features() — it has no CPU features",
3052                name
3053            );
3054            assert!(
3055                trait_to_features(name).is_none(),
3056                "`{}` should NOT be in trait_to_features() — it has no CPU features",
3057                name
3058            );
3059        }
3060    }
3061
3062    #[test]
3063    fn find_featureless_trait_detects_simdtoken() {
3064        let names = vec!["SimdToken".to_string()];
3065        assert_eq!(find_featureless_trait(&names), Some("SimdToken"));
3066
3067        let names = vec!["IntoConcreteToken".to_string()];
3068        assert_eq!(find_featureless_trait(&names), Some("IntoConcreteToken"));
3069
3070        // Feature-bearing traits should NOT be detected
3071        let names = vec!["HasX64V2".to_string()];
3072        assert_eq!(find_featureless_trait(&names), None);
3073
3074        let names = vec!["HasNeon".to_string()];
3075        assert_eq!(find_featureless_trait(&names), None);
3076
3077        // Mixed: if SimdToken is among real traits, still detected
3078        let names = vec!["SimdToken".to_string(), "HasX64V2".to_string()];
3079        assert_eq!(find_featureless_trait(&names), Some("SimdToken"));
3080    }
3081
3082    #[test]
3083    fn arm64_v2_v3_traits_are_cumulative() {
3084        let v2_features = trait_to_features("HasArm64V2").unwrap();
3085        let v3_features = trait_to_features("HasArm64V3").unwrap();
3086
3087        for &f in v2_features {
3088            assert!(
3089                v3_features.contains(&f),
3090                "HasArm64V3 should include v2 feature `{}` but doesn't",
3091                f
3092            );
3093        }
3094
3095        assert!(
3096            v3_features.len() > v2_features.len(),
3097            "HasArm64V3 should have more features than HasArm64V2"
3098        );
3099    }
3100
3101    // =========================================================================
3102    // autoversion — argument parsing
3103    // =========================================================================
3104
3105    #[test]
3106    fn autoversion_args_empty() {
3107        let args: AutoversionArgs = syn::parse_str("").unwrap();
3108        assert!(args.self_type.is_none());
3109        assert!(args.tiers.is_none());
3110    }
3111
3112    #[test]
3113    fn autoversion_args_single_tier() {
3114        let args: AutoversionArgs = syn::parse_str("v3").unwrap();
3115        assert!(args.self_type.is_none());
3116        assert_eq!(args.tiers.as_ref().unwrap(), &["v3"]);
3117    }
3118
3119    #[test]
3120    fn autoversion_args_tiers_only() {
3121        let args: AutoversionArgs = syn::parse_str("v3, v4, neon").unwrap();
3122        assert!(args.self_type.is_none());
3123        let tiers = args.tiers.unwrap();
3124        assert_eq!(tiers, vec!["v3", "v4", "neon"]);
3125    }
3126
3127    #[test]
3128    fn autoversion_args_many_tiers() {
3129        let args: AutoversionArgs =
3130            syn::parse_str("v1, v2, v3, v4, v4x, neon, arm_v2, wasm128").unwrap();
3131        assert_eq!(
3132            args.tiers.unwrap(),
3133            vec!["v1", "v2", "v3", "v4", "v4x", "neon", "arm_v2", "wasm128"]
3134        );
3135    }
3136
3137    #[test]
3138    fn autoversion_args_trailing_comma() {
3139        let args: AutoversionArgs = syn::parse_str("v3, v4,").unwrap();
3140        assert_eq!(args.tiers.as_ref().unwrap(), &["v3", "v4"]);
3141    }
3142
3143    #[test]
3144    fn autoversion_args_self_only() {
3145        let args: AutoversionArgs = syn::parse_str("_self = MyType").unwrap();
3146        assert!(args.self_type.is_some());
3147        assert!(args.tiers.is_none());
3148    }
3149
3150    #[test]
3151    fn autoversion_args_self_and_tiers() {
3152        let args: AutoversionArgs = syn::parse_str("_self = MyType, v3, neon").unwrap();
3153        assert!(args.self_type.is_some());
3154        let tiers = args.tiers.unwrap();
3155        assert_eq!(tiers, vec!["v3", "neon"]);
3156    }
3157
3158    #[test]
3159    fn autoversion_args_tiers_then_self() {
3160        // _self can appear after tier names
3161        let args: AutoversionArgs = syn::parse_str("v3, neon, _self = MyType").unwrap();
3162        assert!(args.self_type.is_some());
3163        let tiers = args.tiers.unwrap();
3164        assert_eq!(tiers, vec!["v3", "neon"]);
3165    }
3166
3167    #[test]
3168    fn autoversion_args_self_with_path_type() {
3169        let args: AutoversionArgs = syn::parse_str("_self = crate::MyType").unwrap();
3170        assert!(args.self_type.is_some());
3171        assert!(args.tiers.is_none());
3172    }
3173
3174    #[test]
3175    fn autoversion_args_self_with_generic_type() {
3176        let args: AutoversionArgs = syn::parse_str("_self = Vec<u8>").unwrap();
3177        assert!(args.self_type.is_some());
3178        let ty_str = args.self_type.unwrap().to_token_stream().to_string();
3179        assert!(ty_str.contains("Vec"), "Expected Vec<u8>, got: {}", ty_str);
3180    }
3181
3182    #[test]
3183    fn autoversion_args_self_trailing_comma() {
3184        let args: AutoversionArgs = syn::parse_str("_self = MyType,").unwrap();
3185        assert!(args.self_type.is_some());
3186        assert!(args.tiers.is_none());
3187    }
3188
3189    // =========================================================================
3190    // autoversion — find_simd_token_param
3191    // =========================================================================
3192
3193    #[test]
3194    fn find_simd_token_param_first_position() {
3195        let f: ItemFn =
3196            syn::parse_str("fn process(token: SimdToken, data: &[f32]) -> f32 {}").unwrap();
3197        let param = find_simd_token_param(&f.sig).unwrap();
3198        assert_eq!(param.index, 0);
3199        assert_eq!(param.ident, "token");
3200    }
3201
3202    #[test]
3203    fn find_simd_token_param_second_position() {
3204        let f: ItemFn =
3205            syn::parse_str("fn process(data: &[f32], token: SimdToken) -> f32 {}").unwrap();
3206        let param = find_simd_token_param(&f.sig).unwrap();
3207        assert_eq!(param.index, 1);
3208        assert_eq!(param.ident, "token");
3209    }
3210
3211    #[test]
3212    fn find_simd_token_param_underscore_prefix() {
3213        let f: ItemFn =
3214            syn::parse_str("fn process(_token: SimdToken, data: &[f32]) -> f32 {}").unwrap();
3215        let param = find_simd_token_param(&f.sig).unwrap();
3216        assert_eq!(param.index, 0);
3217        assert_eq!(param.ident, "_token");
3218    }
3219
3220    #[test]
3221    fn find_simd_token_param_wildcard() {
3222        let f: ItemFn = syn::parse_str("fn process(_: SimdToken, data: &[f32]) -> f32 {}").unwrap();
3223        let param = find_simd_token_param(&f.sig).unwrap();
3224        assert_eq!(param.index, 0);
3225        assert_eq!(param.ident, "__autoversion_token");
3226    }
3227
3228    #[test]
3229    fn find_simd_token_param_not_found() {
3230        let f: ItemFn = syn::parse_str("fn process(data: &[f32]) -> f32 {}").unwrap();
3231        assert!(find_simd_token_param(&f.sig).is_none());
3232    }
3233
3234    #[test]
3235    fn find_simd_token_param_no_params() {
3236        let f: ItemFn = syn::parse_str("fn process() {}").unwrap();
3237        assert!(find_simd_token_param(&f.sig).is_none());
3238    }
3239
3240    #[test]
3241    fn find_simd_token_param_concrete_token_not_matched() {
3242        // autoversion looks specifically for SimdToken, not concrete tokens
3243        let f: ItemFn =
3244            syn::parse_str("fn process(token: X64V3Token, data: &[f32]) -> f32 {}").unwrap();
3245        assert!(find_simd_token_param(&f.sig).is_none());
3246    }
3247
3248    #[test]
3249    fn find_simd_token_param_scalar_token_not_matched() {
3250        let f: ItemFn =
3251            syn::parse_str("fn process(token: ScalarToken, data: &[f32]) -> f32 {}").unwrap();
3252        assert!(find_simd_token_param(&f.sig).is_none());
3253    }
3254
3255    #[test]
3256    fn find_simd_token_param_among_many() {
3257        let f: ItemFn = syn::parse_str(
3258            "fn process(a: i32, b: f64, token: SimdToken, c: &str, d: bool) -> f32 {}",
3259        )
3260        .unwrap();
3261        let param = find_simd_token_param(&f.sig).unwrap();
3262        assert_eq!(param.index, 2);
3263        assert_eq!(param.ident, "token");
3264    }
3265
3266    #[test]
3267    fn find_simd_token_param_with_generics() {
3268        let f: ItemFn =
3269            syn::parse_str("fn process<T: Clone>(token: SimdToken, data: &[T]) -> T {}").unwrap();
3270        let param = find_simd_token_param(&f.sig).unwrap();
3271        assert_eq!(param.index, 0);
3272        assert_eq!(param.ident, "token");
3273    }
3274
3275    #[test]
3276    fn find_simd_token_param_with_where_clause() {
3277        let f: ItemFn = syn::parse_str(
3278            "fn process<T>(token: SimdToken, data: &[T]) -> T where T: Copy + Default {}",
3279        )
3280        .unwrap();
3281        let param = find_simd_token_param(&f.sig).unwrap();
3282        assert_eq!(param.index, 0);
3283    }
3284
3285    #[test]
3286    fn find_simd_token_param_with_lifetime() {
3287        let f: ItemFn =
3288            syn::parse_str("fn process<'a>(token: SimdToken, data: &'a [f32]) -> &'a f32 {}")
3289                .unwrap();
3290        let param = find_simd_token_param(&f.sig).unwrap();
3291        assert_eq!(param.index, 0);
3292    }
3293
3294    // =========================================================================
3295    // autoversion — tier resolution
3296    // =========================================================================
3297
3298    #[test]
3299    fn autoversion_default_tiers_all_resolve() {
3300        let names: Vec<String> = DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect();
3301        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site()).unwrap();
3302        assert!(!tiers.is_empty());
3303        // scalar should be present
3304        assert!(tiers.iter().any(|t| t.name == "scalar"));
3305    }
3306
3307    #[test]
3308    fn autoversion_scalar_always_appended() {
3309        let names = vec!["v3".to_string(), "neon".to_string()];
3310        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site()).unwrap();
3311        assert!(
3312            tiers.iter().any(|t| t.name == "scalar"),
3313            "scalar must be auto-appended"
3314        );
3315    }
3316
3317    #[test]
3318    fn autoversion_scalar_not_duplicated() {
3319        let names = vec!["v3".to_string(), "scalar".to_string()];
3320        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site()).unwrap();
3321        let scalar_count = tiers.iter().filter(|t| t.name == "scalar").count();
3322        assert_eq!(scalar_count, 1, "scalar must not be duplicated");
3323    }
3324
3325    #[test]
3326    fn autoversion_tiers_sorted_by_priority() {
3327        let names = vec!["neon".to_string(), "v4".to_string(), "v3".to_string()];
3328        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site()).unwrap();
3329        // v4 (priority 40) > v3 (30) > neon (20) > scalar (0)
3330        let priorities: Vec<u32> = tiers.iter().map(|t| t.priority).collect();
3331        for window in priorities.windows(2) {
3332            assert!(
3333                window[0] >= window[1],
3334                "Tiers not sorted by priority: {:?}",
3335                priorities
3336            );
3337        }
3338    }
3339
3340    #[test]
3341    fn autoversion_unknown_tier_errors() {
3342        let names = vec!["v3".to_string(), "avx9000".to_string()];
3343        let result = resolve_tiers(&names, proc_macro2::Span::call_site());
3344        match result {
3345            Ok(_) => panic!("Expected error for unknown tier 'avx9000'"),
3346            Err(e) => {
3347                let err_msg = e.to_string();
3348                assert!(
3349                    err_msg.contains("avx9000"),
3350                    "Error should mention unknown tier: {}",
3351                    err_msg
3352                );
3353            }
3354        }
3355    }
3356
3357    #[test]
3358    fn autoversion_all_known_tiers_resolve() {
3359        // Every tier in ALL_TIERS should be findable
3360        for tier in ALL_TIERS {
3361            assert!(
3362                find_tier(tier.name).is_some(),
3363                "Tier '{}' should be findable by name",
3364                tier.name
3365            );
3366        }
3367    }
3368
3369    #[test]
3370    fn autoversion_default_tier_list_is_sensible() {
3371        // Defaults should cover x86, ARM, WASM, and scalar
3372        let names: Vec<String> = DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect();
3373        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site()).unwrap();
3374
3375        let has_x86 = tiers.iter().any(|t| t.target_arch == Some("x86_64"));
3376        let has_arm = tiers.iter().any(|t| t.target_arch == Some("aarch64"));
3377        let has_wasm = tiers.iter().any(|t| t.target_arch == Some("wasm32"));
3378        let has_scalar = tiers.iter().any(|t| t.name == "scalar");
3379
3380        assert!(has_x86, "Default tiers should include an x86_64 tier");
3381        assert!(has_arm, "Default tiers should include an aarch64 tier");
3382        assert!(has_wasm, "Default tiers should include a wasm32 tier");
3383        assert!(has_scalar, "Default tiers should include scalar");
3384    }
3385
3386    // =========================================================================
3387    // autoversion — variant replacement (AST manipulation)
3388    // =========================================================================
3389
3390    /// Mirrors what `autoversion_impl` does for a single variant: parse an
3391    /// ItemFn (for test convenience), rename it, swap the SimdToken param
3392    /// type, optionally inject the `_self` preamble for scalar+self.
3393    fn do_variant_replacement(func: &str, tier_name: &str, has_self: bool) -> ItemFn {
3394        let mut f: ItemFn = syn::parse_str(func).unwrap();
3395        let fn_name = f.sig.ident.to_string();
3396
3397        let tier = find_tier(tier_name).unwrap();
3398
3399        // Rename
3400        f.sig.ident = format_ident!("{}_{}", fn_name, tier.suffix);
3401
3402        // Find and replace SimdToken param type
3403        let token_idx = find_simd_token_param(&f.sig)
3404            .unwrap_or_else(|| panic!("No SimdToken param in: {}", func))
3405            .index;
3406        let concrete_type: Type = syn::parse_str(tier.token_path).unwrap();
3407        if let FnArg::Typed(pt) = &mut f.sig.inputs[token_idx] {
3408            *pt.ty = concrete_type;
3409        }
3410
3411        // Scalar + self: inject preamble
3412        if tier_name == "scalar" && has_self {
3413            let preamble: syn::Stmt = syn::parse_quote!(let _self = self;);
3414            f.block.stmts.insert(0, preamble);
3415        }
3416
3417        f
3418    }
3419
3420    #[test]
3421    fn variant_replacement_v3_renames_function() {
3422        let f = do_variant_replacement(
3423            "fn process(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3424            "v3",
3425            false,
3426        );
3427        assert_eq!(f.sig.ident, "process_v3");
3428    }
3429
3430    #[test]
3431    fn variant_replacement_v3_replaces_token_type() {
3432        let f = do_variant_replacement(
3433            "fn process(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3434            "v3",
3435            false,
3436        );
3437        let first_param_ty = match &f.sig.inputs[0] {
3438            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
3439            _ => panic!("Expected typed param"),
3440        };
3441        assert!(
3442            first_param_ty.contains("X64V3Token"),
3443            "Expected X64V3Token, got: {}",
3444            first_param_ty
3445        );
3446    }
3447
3448    #[test]
3449    fn variant_replacement_neon_produces_valid_fn() {
3450        let f = do_variant_replacement(
3451            "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3452            "neon",
3453            false,
3454        );
3455        assert_eq!(f.sig.ident, "compute_neon");
3456        let first_param_ty = match &f.sig.inputs[0] {
3457            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
3458            _ => panic!("Expected typed param"),
3459        };
3460        assert!(
3461            first_param_ty.contains("NeonToken"),
3462            "Expected NeonToken, got: {}",
3463            first_param_ty
3464        );
3465    }
3466
3467    #[test]
3468    fn variant_replacement_wasm128_produces_valid_fn() {
3469        let f = do_variant_replacement(
3470            "fn compute(_t: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3471            "wasm128",
3472            false,
3473        );
3474        assert_eq!(f.sig.ident, "compute_wasm128");
3475    }
3476
3477    #[test]
3478    fn variant_replacement_scalar_produces_valid_fn() {
3479        let f = do_variant_replacement(
3480            "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3481            "scalar",
3482            false,
3483        );
3484        assert_eq!(f.sig.ident, "compute_scalar");
3485        let first_param_ty = match &f.sig.inputs[0] {
3486            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
3487            _ => panic!("Expected typed param"),
3488        };
3489        assert!(
3490            first_param_ty.contains("ScalarToken"),
3491            "Expected ScalarToken, got: {}",
3492            first_param_ty
3493        );
3494    }
3495
3496    #[test]
3497    fn variant_replacement_v4_produces_valid_fn() {
3498        let f = do_variant_replacement(
3499            "fn transform(token: SimdToken, data: &mut [f32]) { }",
3500            "v4",
3501            false,
3502        );
3503        assert_eq!(f.sig.ident, "transform_v4");
3504        let first_param_ty = match &f.sig.inputs[0] {
3505            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
3506            _ => panic!("Expected typed param"),
3507        };
3508        assert!(
3509            first_param_ty.contains("X64V4Token"),
3510            "Expected X64V4Token, got: {}",
3511            first_param_ty
3512        );
3513    }
3514
3515    #[test]
3516    fn variant_replacement_v4x_produces_valid_fn() {
3517        let f = do_variant_replacement(
3518            "fn transform(token: SimdToken, data: &mut [f32]) { }",
3519            "v4x",
3520            false,
3521        );
3522        assert_eq!(f.sig.ident, "transform_v4x");
3523    }
3524
3525    #[test]
3526    fn variant_replacement_arm_v2_produces_valid_fn() {
3527        let f = do_variant_replacement(
3528            "fn transform(token: SimdToken, data: &mut [f32]) { }",
3529            "arm_v2",
3530            false,
3531        );
3532        assert_eq!(f.sig.ident, "transform_arm_v2");
3533    }
3534
3535    #[test]
3536    fn variant_replacement_preserves_generics() {
3537        let f = do_variant_replacement(
3538            "fn process<T: Copy + Default>(token: SimdToken, data: &[T]) -> T { T::default() }",
3539            "v3",
3540            false,
3541        );
3542        assert_eq!(f.sig.ident, "process_v3");
3543        // Generic params should still be present
3544        assert!(
3545            !f.sig.generics.params.is_empty(),
3546            "Generics should be preserved"
3547        );
3548    }
3549
3550    #[test]
3551    fn variant_replacement_preserves_where_clause() {
3552        let f = do_variant_replacement(
3553            "fn process<T>(token: SimdToken, data: &[T]) -> T where T: Copy + Default { T::default() }",
3554            "v3",
3555            false,
3556        );
3557        assert!(
3558            f.sig.generics.where_clause.is_some(),
3559            "Where clause should be preserved"
3560        );
3561    }
3562
3563    #[test]
3564    fn variant_replacement_preserves_return_type() {
3565        let f = do_variant_replacement(
3566            "fn process(token: SimdToken, data: &[f32]) -> Vec<f32> { vec![] }",
3567            "neon",
3568            false,
3569        );
3570        let ret = f.sig.output.to_token_stream().to_string();
3571        assert!(
3572            ret.contains("Vec"),
3573            "Return type should be preserved, got: {}",
3574            ret
3575        );
3576    }
3577
3578    #[test]
3579    fn variant_replacement_preserves_multiple_params() {
3580        let f = do_variant_replacement(
3581            "fn process(token: SimdToken, a: &[f32], b: &[f32], scale: f32) -> f32 { 0.0 }",
3582            "v3",
3583            false,
3584        );
3585        // SimdToken → X64V3Token, plus the 3 other params
3586        assert_eq!(f.sig.inputs.len(), 4);
3587    }
3588
3589    #[test]
3590    fn variant_replacement_preserves_no_return_type() {
3591        let f = do_variant_replacement(
3592            "fn transform(token: SimdToken, data: &mut [f32]) { }",
3593            "v3",
3594            false,
3595        );
3596        assert!(
3597            matches!(f.sig.output, ReturnType::Default),
3598            "No return type should remain as Default"
3599        );
3600    }
3601
3602    #[test]
3603    fn variant_replacement_preserves_lifetime_params() {
3604        let f = do_variant_replacement(
3605            "fn process<'a>(token: SimdToken, data: &'a [f32]) -> &'a [f32] { data }",
3606            "v3",
3607            false,
3608        );
3609        assert!(!f.sig.generics.params.is_empty());
3610    }
3611
3612    #[test]
3613    fn variant_replacement_scalar_self_injects_preamble() {
3614        let f = do_variant_replacement(
3615            "fn method(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3616            "scalar",
3617            true, // has_self
3618        );
3619        assert_eq!(f.sig.ident, "method_scalar");
3620
3621        // First statement should be `let _self = self;`
3622        let body_str = f.block.to_token_stream().to_string();
3623        assert!(
3624            body_str.contains("let _self = self"),
3625            "Scalar+self variant should have _self preamble, got: {}",
3626            body_str
3627        );
3628    }
3629
3630    #[test]
3631    fn variant_replacement_all_default_tiers_produce_valid_fns() {
3632        let names: Vec<String> = DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect();
3633        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site()).unwrap();
3634
3635        for tier in &tiers {
3636            let f = do_variant_replacement(
3637                "fn process(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3638                tier.name,
3639                false,
3640            );
3641            let expected_name = format!("process_{}", tier.suffix);
3642            assert_eq!(
3643                f.sig.ident.to_string(),
3644                expected_name,
3645                "Tier '{}' should produce function '{}'",
3646                tier.name,
3647                expected_name
3648            );
3649        }
3650    }
3651
3652    #[test]
3653    fn variant_replacement_all_known_tiers_produce_valid_fns() {
3654        for tier in ALL_TIERS {
3655            let f = do_variant_replacement(
3656                "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3657                tier.name,
3658                false,
3659            );
3660            let expected_name = format!("compute_{}", tier.suffix);
3661            assert_eq!(
3662                f.sig.ident.to_string(),
3663                expected_name,
3664                "Tier '{}' should produce function '{}'",
3665                tier.name,
3666                expected_name
3667            );
3668        }
3669    }
3670
3671    #[test]
3672    fn variant_replacement_no_simdtoken_remains() {
3673        for tier in ALL_TIERS {
3674            let f = do_variant_replacement(
3675                "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3676                tier.name,
3677                false,
3678            );
3679            let full_str = f.to_token_stream().to_string();
3680            assert!(
3681                !full_str.contains("SimdToken"),
3682                "Tier '{}' variant still contains 'SimdToken': {}",
3683                tier.name,
3684                full_str
3685            );
3686        }
3687    }
3688
3689    // =========================================================================
3690    // autoversion — cfg guard and tier descriptor properties
3691    // =========================================================================
3692
3693    #[test]
3694    fn tier_v3_targets_x86_64() {
3695        let tier = find_tier("v3").unwrap();
3696        assert_eq!(tier.target_arch, Some("x86_64"));
3697        assert_eq!(tier.cargo_feature, None);
3698    }
3699
3700    #[test]
3701    fn tier_v4_requires_avx512_feature() {
3702        let tier = find_tier("v4").unwrap();
3703        assert_eq!(tier.target_arch, Some("x86_64"));
3704        assert_eq!(tier.cargo_feature, Some("avx512"));
3705    }
3706
3707    #[test]
3708    fn tier_v4x_requires_avx512_feature() {
3709        let tier = find_tier("v4x").unwrap();
3710        assert_eq!(tier.cargo_feature, Some("avx512"));
3711    }
3712
3713    #[test]
3714    fn tier_neon_targets_aarch64() {
3715        let tier = find_tier("neon").unwrap();
3716        assert_eq!(tier.target_arch, Some("aarch64"));
3717        assert_eq!(tier.cargo_feature, None);
3718    }
3719
3720    #[test]
3721    fn tier_wasm128_targets_wasm32() {
3722        let tier = find_tier("wasm128").unwrap();
3723        assert_eq!(tier.target_arch, Some("wasm32"));
3724        assert_eq!(tier.cargo_feature, None);
3725    }
3726
3727    #[test]
3728    fn tier_scalar_has_no_guards() {
3729        let tier = find_tier("scalar").unwrap();
3730        assert_eq!(tier.target_arch, None);
3731        assert_eq!(tier.cargo_feature, None);
3732        assert_eq!(tier.priority, 0);
3733    }
3734
3735    #[test]
3736    fn tier_priorities_are_consistent() {
3737        // Higher-capability tiers within the same arch should have higher priority
3738        let v2 = find_tier("v2").unwrap();
3739        let v3 = find_tier("v3").unwrap();
3740        let v4 = find_tier("v4").unwrap();
3741        assert!(v4.priority > v3.priority);
3742        assert!(v3.priority > v2.priority);
3743
3744        let neon = find_tier("neon").unwrap();
3745        let arm_v2 = find_tier("arm_v2").unwrap();
3746        let arm_v3 = find_tier("arm_v3").unwrap();
3747        assert!(arm_v3.priority > arm_v2.priority);
3748        assert!(arm_v2.priority > neon.priority);
3749
3750        // scalar is lowest
3751        let scalar = find_tier("scalar").unwrap();
3752        assert!(neon.priority > scalar.priority);
3753        assert!(v2.priority > scalar.priority);
3754    }
3755
3756    // =========================================================================
3757    // autoversion — dispatcher structure
3758    // =========================================================================
3759
3760    #[test]
3761    fn dispatcher_param_removal_free_fn() {
3762        // Simulate what autoversion_impl does: remove the SimdToken param
3763        let f: ItemFn =
3764            syn::parse_str("fn process(token: SimdToken, data: &[f32], scale: f32) -> f32 { 0.0 }")
3765                .unwrap();
3766
3767        let token_param = find_simd_token_param(&f.sig).unwrap();
3768        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
3769        dispatcher_inputs.remove(token_param.index);
3770
3771        // Should have 2 params remaining: data, scale
3772        assert_eq!(dispatcher_inputs.len(), 2);
3773
3774        // Neither should be SimdToken
3775        for arg in &dispatcher_inputs {
3776            if let FnArg::Typed(pt) = arg {
3777                let ty_str = pt.ty.to_token_stream().to_string();
3778                assert!(
3779                    !ty_str.contains("SimdToken"),
3780                    "SimdToken should be removed from dispatcher, found: {}",
3781                    ty_str
3782                );
3783            }
3784        }
3785    }
3786
3787    #[test]
3788    fn dispatcher_param_removal_token_only() {
3789        let f: ItemFn = syn::parse_str("fn process(token: SimdToken) -> f32 { 0.0 }").unwrap();
3790
3791        let token_param = find_simd_token_param(&f.sig).unwrap();
3792        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
3793        dispatcher_inputs.remove(token_param.index);
3794
3795        // No params left — dispatcher takes no arguments
3796        assert_eq!(dispatcher_inputs.len(), 0);
3797    }
3798
3799    #[test]
3800    fn dispatcher_param_removal_token_last() {
3801        let f: ItemFn =
3802            syn::parse_str("fn process(data: &[f32], scale: f32, token: SimdToken) -> f32 { 0.0 }")
3803                .unwrap();
3804
3805        let token_param = find_simd_token_param(&f.sig).unwrap();
3806        assert_eq!(token_param.index, 2);
3807
3808        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
3809        dispatcher_inputs.remove(token_param.index);
3810
3811        assert_eq!(dispatcher_inputs.len(), 2);
3812    }
3813
3814    #[test]
3815    fn dispatcher_dispatch_args_extraction() {
3816        // Test that we correctly extract idents for the dispatch call
3817        let f: ItemFn =
3818            syn::parse_str("fn process(data: &[f32], scale: f32) -> f32 { 0.0 }").unwrap();
3819
3820        let dispatch_args: Vec<String> = f
3821            .sig
3822            .inputs
3823            .iter()
3824            .filter_map(|arg| {
3825                if let FnArg::Typed(PatType { pat, .. }) = arg {
3826                    if let syn::Pat::Ident(pi) = pat.as_ref() {
3827                        return Some(pi.ident.to_string());
3828                    }
3829                }
3830                None
3831            })
3832            .collect();
3833
3834        assert_eq!(dispatch_args, vec!["data", "scale"]);
3835    }
3836
3837    #[test]
3838    fn dispatcher_wildcard_params_get_renamed() {
3839        let f: ItemFn = syn::parse_str("fn process(_: &[f32], _: f32) -> f32 { 0.0 }").unwrap();
3840
3841        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
3842
3843        let mut wild_counter = 0u32;
3844        for arg in &mut dispatcher_inputs {
3845            if let FnArg::Typed(pat_type) = arg {
3846                if matches!(pat_type.pat.as_ref(), syn::Pat::Wild(_)) {
3847                    let ident = format_ident!("__autoversion_wild_{}", wild_counter);
3848                    wild_counter += 1;
3849                    *pat_type.pat = syn::Pat::Ident(syn::PatIdent {
3850                        attrs: vec![],
3851                        by_ref: None,
3852                        mutability: None,
3853                        ident,
3854                        subpat: None,
3855                    });
3856                }
3857            }
3858        }
3859
3860        // Both wildcards should be renamed
3861        assert_eq!(wild_counter, 2);
3862
3863        let names: Vec<String> = dispatcher_inputs
3864            .iter()
3865            .filter_map(|arg| {
3866                if let FnArg::Typed(PatType { pat, .. }) = arg {
3867                    if let syn::Pat::Ident(pi) = pat.as_ref() {
3868                        return Some(pi.ident.to_string());
3869                    }
3870                }
3871                None
3872            })
3873            .collect();
3874
3875        assert_eq!(names, vec!["__autoversion_wild_0", "__autoversion_wild_1"]);
3876    }
3877
3878    // =========================================================================
3879    // autoversion — suffix_path (reused in dispatch)
3880    // =========================================================================
3881
3882    #[test]
3883    fn suffix_path_simple() {
3884        let path: syn::Path = syn::parse_str("process").unwrap();
3885        let suffixed = suffix_path(&path, "v3");
3886        assert_eq!(suffixed.to_token_stream().to_string(), "process_v3");
3887    }
3888
3889    #[test]
3890    fn suffix_path_qualified() {
3891        let path: syn::Path = syn::parse_str("module::process").unwrap();
3892        let suffixed = suffix_path(&path, "neon");
3893        let s = suffixed.to_token_stream().to_string();
3894        assert!(
3895            s.contains("process_neon"),
3896            "Expected process_neon, got: {}",
3897            s
3898        );
3899    }
3900}