Skip to main content

archmage_macros/
lib.rs

1//! Proc-macros for archmage SIMD capability tokens.
2//!
3//! Provides `#[arcane]` attribute (with `#[arcane]` alias) to make raw intrinsics
4//! safe via token proof.
5
6use proc_macro::TokenStream;
7use quote::{ToTokens, format_ident, quote, quote_spanned};
8use syn::{
9    Attribute, FnArg, GenericParam, Ident, PatType, Signature, Token, Type, TypeParamBound,
10    parse::{Parse, ParseStream},
11    parse_macro_input, parse_quote, token,
12};
13
14/// A function parsed with the body left as an opaque TokenStream.
15///
16/// Only the signature is fully parsed into an AST — the body tokens are collected
17/// without building any AST nodes (no expressions, statements, or patterns parsed).
18/// This saves ~2ms per function invocation at 100 lines of code.
19#[derive(Clone)]
20struct LightFn {
21    attrs: Vec<Attribute>,
22    vis: syn::Visibility,
23    sig: Signature,
24    brace_token: token::Brace,
25    body: proc_macro2::TokenStream,
26}
27
28impl Parse for LightFn {
29    fn parse(input: ParseStream) -> syn::Result<Self> {
30        let attrs = input.call(Attribute::parse_outer)?;
31        let vis: syn::Visibility = input.parse()?;
32        let sig: Signature = input.parse()?;
33        let content;
34        let brace_token = syn::braced!(content in input);
35        let body: proc_macro2::TokenStream = content.parse()?;
36        Ok(LightFn {
37            attrs,
38            vis,
39            sig,
40            brace_token,
41            body,
42        })
43    }
44}
45
46impl ToTokens for LightFn {
47    fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
48        for attr in &self.attrs {
49            attr.to_tokens(tokens);
50        }
51        self.vis.to_tokens(tokens);
52        self.sig.to_tokens(tokens);
53        self.brace_token.surround(tokens, |tokens| {
54            self.body.to_tokens(tokens);
55        });
56    }
57}
58
59/// Filter out `#[inline]`, `#[inline(always)]`, `#[inline(never)]` from attributes.
60///
61/// Used to prevent duplicate inline attributes when the macro adds its own.
62/// Duplicate `#[inline]` is a warning that will become a hard error.
63fn filter_inline_attrs(attrs: &[Attribute]) -> Vec<&Attribute> {
64    attrs
65        .iter()
66        .filter(|attr| !attr.path().is_ident("inline"))
67        .collect()
68}
69
70/// Check if an attribute is a lint-control attribute.
71///
72/// Lint-control attributes (`#[allow(...)]`, `#[expect(...)]`, `#[deny(...)]`,
73/// `#[warn(...)]`, `#[forbid(...)]`) must be propagated to generated sibling
74/// functions so that user-applied lint suppressions work on the generated code.
75fn is_lint_attr(attr: &Attribute) -> bool {
76    let path = attr.path();
77    path.is_ident("allow")
78        || path.is_ident("expect")
79        || path.is_ident("deny")
80        || path.is_ident("warn")
81        || path.is_ident("forbid")
82}
83
84/// Extract lint-control attributes from a list of attributes.
85///
86/// Returns references to `#[allow(...)]`, `#[expect(...)]`, `#[deny(...)]`,
87/// `#[warn(...)]`, and `#[forbid(...)]` attributes. These need to be propagated
88/// to generated sibling functions so clippy/rustc lint suppressions work.
89fn filter_lint_attrs(attrs: &[Attribute]) -> Vec<&Attribute> {
90    attrs.iter().filter(|attr| is_lint_attr(attr)).collect()
91}
92
93/// Generate a cfg guard combining target_arch and an optional feature gate.
94///
95/// - `(Some("x86_64"), None)` → `#[cfg(target_arch = "x86_64")]`
96/// - `(Some("x86_64"), Some("avx512"))` → `#[cfg(all(target_arch = "x86_64", feature = "avx512"))]`
97/// - `(None, Some("avx512"))` → `#[cfg(feature = "avx512")]`
98/// - `(None, None)` → empty
99fn gen_cfg_guard(target_arch: Option<&str>, cfg_feature: Option<&str>) -> proc_macro2::TokenStream {
100    match (target_arch, cfg_feature) {
101        (Some(arch), Some(feat)) => {
102            quote! { #[cfg(all(target_arch = #arch, feature = #feat))] }
103        }
104        (Some(arch), None) => quote! { #[cfg(target_arch = #arch)] },
105        (None, Some(feat)) => quote! { #[cfg(feature = #feat)] },
106        (None, None) => quote! {},
107    }
108}
109
110/// Build a turbofish token stream from a function's generics.
111///
112/// Collects type and const generic parameters (skipping lifetimes) and returns
113/// a `::<A, B, N, M>` turbofish fragment. Returns empty tokens if there are no
114/// type/const generics to forward.
115///
116/// This is needed when the dispatcher or wrapper calls variant/sibling functions
117/// that have const generics not inferable from argument types alone.
118fn build_turbofish(generics: &syn::Generics) -> proc_macro2::TokenStream {
119    let params: Vec<proc_macro2::TokenStream> = generics
120        .params
121        .iter()
122        .filter_map(|param| match param {
123            GenericParam::Type(tp) => {
124                let ident = &tp.ident;
125                Some(quote! { #ident })
126            }
127            GenericParam::Const(cp) => {
128                let ident = &cp.ident;
129                Some(quote! { #ident })
130            }
131            GenericParam::Lifetime(_) => None,
132        })
133        .collect();
134    if params.is_empty() {
135        quote! {}
136    } else {
137        quote! { ::<#(#params),*> }
138    }
139}
140
141/// Replace all `Self` identifier tokens with a concrete type in a token stream.
142///
143/// Recurses into groups (braces, parens, brackets). Used for `#[arcane(_self = Type)]`
144/// to replace `Self` in both the return type and body without needing to parse the body.
145fn replace_self_in_tokens(
146    tokens: proc_macro2::TokenStream,
147    replacement: &Type,
148) -> proc_macro2::TokenStream {
149    let mut result = proc_macro2::TokenStream::new();
150    for tt in tokens {
151        match tt {
152            proc_macro2::TokenTree::Ident(ref ident) if ident == "Self" => {
153                result.extend(replacement.to_token_stream());
154            }
155            proc_macro2::TokenTree::Group(group) => {
156                let new_stream = replace_self_in_tokens(group.stream(), replacement);
157                let mut new_group = proc_macro2::Group::new(group.delimiter(), new_stream);
158                new_group.set_span(group.span());
159                result.extend(std::iter::once(proc_macro2::TokenTree::Group(new_group)));
160            }
161            other => {
162                result.extend(std::iter::once(other));
163            }
164        }
165    }
166    result
167}
168
169/// Arguments to the `#[arcane]` macro.
170#[derive(Default)]
171struct ArcaneArgs {
172    /// Use `#[inline(always)]` instead of `#[inline]` for the inner function.
173    /// Requires nightly Rust with `#![feature(target_feature_inline_always)]`.
174    inline_always: bool,
175    /// The concrete type to use for `self` receiver.
176    /// When specified, `self`/`&self`/`&mut self` is transformed to `_self: Type`/`&Type`/`&mut Type`.
177    /// Implies `nested = true`.
178    self_type: Option<Type>,
179    /// Generate an `unreachable!()` stub on the wrong architecture.
180    /// Default is false (cfg-out: no function emitted on wrong arch).
181    stub: bool,
182    /// Use nested inner function instead of sibling function.
183    /// Implied by `_self = Type`. Required for associated functions in impl blocks
184    /// that have no `self` receiver (the macro can't distinguish them from free functions).
185    nested: bool,
186    /// Inject `use archmage::intrinsics::{arch}::*;` (includes safe memory ops).
187    import_intrinsics: bool,
188    /// Inject `use magetypes::simd::{ns}::*;`, `use magetypes::simd::generic::*;`,
189    /// and `use magetypes::simd::backends::*;`.
190    import_magetypes: bool,
191    /// Additional cargo feature gate. When set, the generated `#[cfg(target_arch)]`
192    /// becomes `#[cfg(all(target_arch = "...", feature = "..."))]`.
193    /// Example: `#[arcane(cfg(avx512))]` → `#[cfg(all(target_arch = "x86_64", feature = "avx512"))]`
194    cfg_feature: Option<String>,
195}
196
197impl Parse for ArcaneArgs {
198    fn parse(input: ParseStream) -> syn::Result<Self> {
199        let mut args = ArcaneArgs::default();
200
201        while !input.is_empty() {
202            let ident: Ident = input.parse()?;
203            match ident.to_string().as_str() {
204                "inline_always" => args.inline_always = true,
205                "stub" => args.stub = true,
206                "nested" => args.nested = true,
207                "import_intrinsics" => args.import_intrinsics = true,
208                "import_magetypes" => args.import_magetypes = true,
209                "cfg" => {
210                    let content;
211                    syn::parenthesized!(content in input);
212                    let feat: Ident = content.parse()?;
213                    args.cfg_feature = Some(feat.to_string());
214                }
215                "_self" => {
216                    let _: Token![=] = input.parse()?;
217                    args.self_type = Some(input.parse()?);
218                }
219                other => {
220                    return Err(syn::Error::new(
221                        ident.span(),
222                        format!("unknown arcane argument: `{}`", other),
223                    ));
224                }
225            }
226            // Consume optional comma
227            if input.peek(Token![,]) {
228                let _: Token![,] = input.parse()?;
229            }
230        }
231
232        // _self = Type implies nested (inner fn needed for Self replacement)
233        if args.self_type.is_some() {
234            args.nested = true;
235        }
236
237        Ok(args)
238    }
239}
240
241// Token-to-features and trait-to-features mappings are generated from
242// token-registry.toml by xtask. Regenerate with: cargo run -p xtask -- generate
243mod generated;
244use generated::{
245    canonical_token_to_tier_suffix, tier_to_canonical_token, token_to_arch, token_to_features,
246    token_to_magetypes_namespace, trait_to_arch, trait_to_features, trait_to_magetypes_namespace,
247};
248
249/// Result of extracting token info from a type.
250enum TokenTypeInfo {
251    /// Concrete token type (e.g., `Avx2Token`)
252    Concrete(String),
253    /// impl Trait with the trait names (e.g., `impl HasX64V2`)
254    ImplTrait(Vec<String>),
255    /// Generic type parameter name (e.g., `T`)
256    Generic(String),
257}
258
259/// Extract token type information from a type.
260fn extract_token_type_info(ty: &Type) -> Option<TokenTypeInfo> {
261    match ty {
262        Type::Path(type_path) => {
263            // Get the last segment of the path (e.g., "Avx2Token" from "archmage::Avx2Token")
264            type_path.path.segments.last().map(|seg| {
265                let name = seg.ident.to_string();
266                // Check if it's a known concrete token type
267                if token_to_features(&name).is_some() {
268                    TokenTypeInfo::Concrete(name)
269                } else {
270                    // Might be a generic type parameter like `T`
271                    TokenTypeInfo::Generic(name)
272                }
273            })
274        }
275        Type::Reference(type_ref) => {
276            // Handle &Token or &mut Token
277            extract_token_type_info(&type_ref.elem)
278        }
279        Type::ImplTrait(impl_trait) => {
280            // Handle `impl HasX64V2` or `impl HasX64V2 + HasNeon`
281            let traits: Vec<String> = extract_trait_names_from_bounds(&impl_trait.bounds);
282            if traits.is_empty() {
283                None
284            } else {
285                Some(TokenTypeInfo::ImplTrait(traits))
286            }
287        }
288        _ => None,
289    }
290}
291
292/// Extract trait names from type param bounds.
293fn extract_trait_names_from_bounds(
294    bounds: &syn::punctuated::Punctuated<TypeParamBound, Token![+]>,
295) -> Vec<String> {
296    bounds
297        .iter()
298        .filter_map(|bound| {
299            if let TypeParamBound::Trait(trait_bound) = bound {
300                trait_bound
301                    .path
302                    .segments
303                    .last()
304                    .map(|seg| seg.ident.to_string())
305            } else {
306                None
307            }
308        })
309        .collect()
310}
311
312/// Look up a generic type parameter in the function's generics.
313fn find_generic_bounds(sig: &Signature, type_name: &str) -> Option<Vec<String>> {
314    // Check inline bounds first (e.g., `fn foo<T: HasX64V2>(token: T)`)
315    for param in &sig.generics.params {
316        if let GenericParam::Type(type_param) = param
317            && type_param.ident == type_name
318        {
319            let traits = extract_trait_names_from_bounds(&type_param.bounds);
320            if !traits.is_empty() {
321                return Some(traits);
322            }
323        }
324    }
325
326    // Check where clause (e.g., `fn foo<T>(token: T) where T: HasX64V2`)
327    if let Some(where_clause) = &sig.generics.where_clause {
328        for predicate in &where_clause.predicates {
329            if let syn::WherePredicate::Type(pred_type) = predicate
330                && let Type::Path(type_path) = &pred_type.bounded_ty
331                && let Some(seg) = type_path.path.segments.last()
332                && seg.ident == type_name
333            {
334                let traits = extract_trait_names_from_bounds(&pred_type.bounds);
335                if !traits.is_empty() {
336                    return Some(traits);
337                }
338            }
339        }
340    }
341
342    None
343}
344
345/// Convert trait names to features, collecting all features from all traits.
346fn traits_to_features(trait_names: &[String]) -> Option<Vec<&'static str>> {
347    let mut all_features = Vec::new();
348
349    for trait_name in trait_names {
350        if let Some(features) = trait_to_features(trait_name) {
351            for &feature in features {
352                if !all_features.contains(&feature) {
353                    all_features.push(feature);
354                }
355            }
356        }
357    }
358
359    if all_features.is_empty() {
360        None
361    } else {
362        Some(all_features)
363    }
364}
365
366/// Trait names that don't map to any CPU features. These are valid in the type
367/// system but cannot be used as token bounds in `#[arcane]`/`#[rite]` because
368/// the macros need concrete features to generate `#[target_feature]` attributes.
369const FEATURELESS_TRAIT_NAMES: &[&str] = &["SimdToken", "IntoConcreteToken"];
370
371/// Check if any trait names are featureless (no CPU feature mapping).
372/// Returns the first featureless trait name found.
373fn find_featureless_trait(trait_names: &[String]) -> Option<&'static str> {
374    for name in trait_names {
375        for &featureless in FEATURELESS_TRAIT_NAMES {
376            if name == featureless {
377                return Some(featureless);
378            }
379        }
380    }
381    None
382}
383
384/// Diagnose why `find_token_param` failed. Returns the name of a featureless
385/// trait if the signature has a parameter bounded by one (e.g., `SimdToken`).
386fn diagnose_featureless_token(sig: &Signature) -> Option<&'static str> {
387    for arg in &sig.inputs {
388        if let FnArg::Typed(PatType { ty, .. }) = arg
389            && let Some(info) = extract_token_type_info(ty)
390        {
391            match &info {
392                TokenTypeInfo::ImplTrait(names) => {
393                    if let Some(name) = find_featureless_trait(names) {
394                        return Some(name);
395                    }
396                }
397                TokenTypeInfo::Generic(type_name) => {
398                    // Check if the type name itself is a featureless trait
399                    // (e.g., `token: SimdToken` used as a bare path)
400                    let as_vec = vec![type_name.clone()];
401                    if let Some(name) = find_featureless_trait(&as_vec) {
402                        return Some(name);
403                    }
404                    // Check generic bounds (e.g., `T: SimdToken`)
405                    if let Some(bounds) = find_generic_bounds(sig, type_name)
406                        && let Some(name) = find_featureless_trait(&bounds)
407                    {
408                        return Some(name);
409                    }
410                }
411                TokenTypeInfo::Concrete(_) => {}
412            }
413        }
414    }
415    None
416}
417
418/// Result of finding a token parameter in a function signature.
419struct TokenParamInfo {
420    /// The parameter identifier (e.g., `token`)
421    ident: Ident,
422    /// Target features to enable (e.g., `["avx2", "fma"]`)
423    features: Vec<&'static str>,
424    /// Target architecture (Some for concrete tokens, None for traits/generics)
425    target_arch: Option<&'static str>,
426    /// Concrete token type name (Some for concrete tokens, None for traits/generics)
427    token_type_name: Option<String>,
428    /// Magetypes width namespace (e.g., "v3", "neon", "wasm128")
429    magetypes_namespace: Option<&'static str>,
430}
431
432/// Resolve magetypes namespace from a list of trait names.
433/// Returns the first matching namespace found.
434fn traits_to_magetypes_namespace(trait_names: &[String]) -> Option<&'static str> {
435    for name in trait_names {
436        if let Some(ns) = trait_to_magetypes_namespace(name) {
437            return Some(ns);
438        }
439    }
440    None
441}
442
443/// Given trait bound names, return the first matching target architecture.
444fn traits_to_arch(trait_names: &[String]) -> Option<&'static str> {
445    for name in trait_names {
446        if let Some(arch) = trait_to_arch(name) {
447            return Some(arch);
448        }
449    }
450    None
451}
452
453/// Find the first token parameter in a function signature.
454fn find_token_param(sig: &Signature) -> Option<TokenParamInfo> {
455    for arg in &sig.inputs {
456        match arg {
457            FnArg::Receiver(_) => {
458                // Self receivers (self, &self, &mut self) are not yet supported.
459                // The macro creates an inner function, and Rust's inner functions
460                // cannot have `self` parameters. Supporting this would require
461                // AST rewriting to replace `self` with a regular parameter.
462                // See the module docs for the workaround.
463                continue;
464            }
465            FnArg::Typed(PatType { pat, ty, .. }) => {
466                if let Some(info) = extract_token_type_info(ty) {
467                    let (features, arch, token_name, mage_ns) = match info {
468                        TokenTypeInfo::Concrete(ref name) => {
469                            let features = token_to_features(name).map(|f| f.to_vec());
470                            let arch = token_to_arch(name);
471                            let ns = token_to_magetypes_namespace(name);
472                            (features, arch, Some(name.clone()), ns)
473                        }
474                        TokenTypeInfo::ImplTrait(ref trait_names) => {
475                            let ns = traits_to_magetypes_namespace(trait_names);
476                            let arch = traits_to_arch(trait_names);
477                            (traits_to_features(trait_names), arch, None, ns)
478                        }
479                        TokenTypeInfo::Generic(type_name) => {
480                            // Look up the generic parameter's bounds
481                            let bounds = find_generic_bounds(sig, &type_name);
482                            let features = bounds.as_ref().and_then(|t| traits_to_features(t));
483                            let ns = bounds
484                                .as_ref()
485                                .and_then(|t| traits_to_magetypes_namespace(t));
486                            let arch = bounds.as_ref().and_then(|t| traits_to_arch(t));
487                            (features, arch, None, ns)
488                        }
489                    };
490
491                    if let Some(features) = features {
492                        // Extract parameter name (or synthesize one for wildcard `_`)
493                        let ident = match pat.as_ref() {
494                            syn::Pat::Ident(pat_ident) => Some(pat_ident.ident.clone()),
495                            syn::Pat::Wild(w) => {
496                                Some(Ident::new("__archmage_token", w.underscore_token.span))
497                            }
498                            _ => None,
499                        };
500                        if let Some(ident) = ident {
501                            return Some(TokenParamInfo {
502                                ident,
503                                features,
504                                target_arch: arch,
505                                token_type_name: token_name,
506                                magetypes_namespace: mage_ns,
507                            });
508                        }
509                    }
510                }
511            }
512        }
513    }
514    None
515}
516
517/// Represents the kind of self receiver and the transformed parameter.
518enum SelfReceiver {
519    /// `self` (by value/move)
520    Owned,
521    /// `&self` (shared reference)
522    Ref,
523    /// `&mut self` (mutable reference)
524    RefMut,
525}
526
527/// Generate import statements to prepend to a function body.
528///
529/// Returns a `TokenStream` of `use` statements based on the import flags,
530/// target architecture, and magetypes namespace.
531fn generate_imports(
532    target_arch: Option<&str>,
533    magetypes_namespace: Option<&str>,
534    import_intrinsics: bool,
535    import_magetypes: bool,
536) -> proc_macro2::TokenStream {
537    let mut imports = proc_macro2::TokenStream::new();
538
539    if import_intrinsics && let Some(arch) = target_arch {
540        let arch_ident = format_ident!("{}", arch);
541        imports.extend(quote! {
542            #[allow(unused_imports)]
543            use archmage::intrinsics::#arch_ident::*;
544        });
545        // ScalarToken or unknown arch: import_intrinsics is a no-op
546    }
547
548    if import_magetypes && let Some(ns) = magetypes_namespace {
549        let ns_ident = format_ident!("{}", ns);
550        imports.extend(quote! {
551            #[allow(unused_imports)]
552            use magetypes::simd::#ns_ident::*;
553            #[allow(unused_imports)]
554            use magetypes::simd::backends::*;
555        });
556    }
557
558    imports
559}
560
561/// Shared implementation for arcane/arcane macros.
562fn arcane_impl(mut input_fn: LightFn, macro_name: &str, args: ArcaneArgs) -> TokenStream {
563    // Check for self receiver
564    let has_self_receiver = input_fn
565        .sig
566        .inputs
567        .first()
568        .map(|arg| matches!(arg, FnArg::Receiver(_)))
569        .unwrap_or(false);
570
571    // Nested mode is required when _self = Type is used (for Self replacement in nested fn).
572    // In sibling mode, self/Self work naturally since both fns live in the same impl scope.
573    // However, if there's a self receiver in nested mode, we still need _self = Type.
574    if has_self_receiver && args.nested && args.self_type.is_none() {
575        let msg = format!(
576            "{} with self receiver in nested mode requires `_self = Type` argument.\n\
577             Example: #[{}(nested, _self = MyType)]\n\
578             Use `_self` (not `self`) in the function body to refer to self.\n\
579             \n\
580             Alternatively, remove `nested` to use sibling expansion (default), \
581             which handles self/Self naturally.",
582            macro_name, macro_name
583        );
584        return syn::Error::new_spanned(&input_fn.sig, msg)
585            .to_compile_error()
586            .into();
587    }
588
589    // Find the token parameter, its features, target arch, and token type name
590    let TokenParamInfo {
591        ident: _token_ident,
592        features,
593        target_arch,
594        token_type_name,
595        magetypes_namespace,
596    } = match find_token_param(&input_fn.sig) {
597        Some(result) => result,
598        None => {
599            // Check for specific misuse: featureless traits like SimdToken
600            if let Some(trait_name) = diagnose_featureless_token(&input_fn.sig) {
601                let msg = format!(
602                    "`{trait_name}` cannot be used as a token bound in #[{macro_name}] \
603                     because it doesn't specify any CPU features.\n\
604                     \n\
605                     #[{macro_name}] needs concrete features to generate #[target_feature]. \
606                     Use a concrete token or a feature trait:\n\
607                     \n\
608                     Concrete tokens: X64V3Token, Desktop64, NeonToken, Arm64V2Token, ...\n\
609                     Feature traits:  impl HasX64V2, impl HasNeon, impl HasArm64V3, ..."
610                );
611                return syn::Error::new_spanned(&input_fn.sig, msg)
612                    .to_compile_error()
613                    .into();
614            }
615            let msg = format!(
616                "{} requires a token parameter. Supported forms:\n\
617                 - Concrete: `token: X64V3Token`\n\
618                 - impl Trait: `token: impl HasX64V2`\n\
619                 - Generic: `fn foo<T: HasX64V2>(token: T, ...)`\n\
620                 - With self: `#[{}(_self = Type)] fn method(&self, token: impl HasNeon, ...)`",
621                macro_name, macro_name
622            );
623            return syn::Error::new_spanned(&input_fn.sig, msg)
624                .to_compile_error()
625                .into();
626        }
627    };
628
629    // Check: import_intrinsics with AVX-512 features requires the avx512 cargo feature
630    // on archmage (propagated to archmage-macros). Without it, 512-bit safe memory ops
631    // from safe_unaligned_simd are not available, and _mm512_loadu_ps etc. would resolve
632    // to the unsafe core::arch versions (taking raw pointers instead of references).
633    //
634    // We check the resolved features (not the token name) so this works uniformly for
635    // concrete tokens (X64V4Token), trait bounds (impl HasX64V4), and generics (T: HasX64V4).
636    #[cfg(not(feature = "avx512"))]
637    if args.import_intrinsics && features.iter().any(|f| f.starts_with("avx512")) {
638        let token_desc = token_type_name.as_deref().unwrap_or("an AVX-512 token");
639        let msg = format!(
640            "Using {token_desc} with `import_intrinsics` requires the `avx512` feature.\n\
641             \n\
642             Add to your Cargo.toml:\n\
643             \x20 archmage = {{ version = \"...\", features = [\"avx512\"] }}\n\
644             \n\
645             Without it, 512-bit safe memory ops (_mm512_loadu_ps etc.) are not available.\n\
646             If you only need value intrinsics (no memory ops), remove `import_intrinsics`."
647        );
648        return syn::Error::new_spanned(&input_fn.sig, msg)
649            .to_compile_error()
650            .into();
651    }
652
653    // Prepend import statements to body if requested
654    let body_imports = generate_imports(
655        target_arch,
656        magetypes_namespace,
657        args.import_intrinsics,
658        args.import_magetypes,
659    );
660    if !body_imports.is_empty() {
661        let original_body = &input_fn.body;
662        input_fn.body = quote! {
663            #body_imports
664            #original_body
665        };
666    }
667
668    // Build target_feature attributes
669    let target_feature_attrs: Vec<Attribute> = features
670        .iter()
671        .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
672        .collect();
673
674    // Rename wildcard patterns (`_: Type`) to named params so the inner/sibling call works
675    let mut wild_rename_counter = 0u32;
676    for arg in &mut input_fn.sig.inputs {
677        if let FnArg::Typed(pat_type) = arg
678            && matches!(pat_type.pat.as_ref(), syn::Pat::Wild(_))
679        {
680            let ident = format_ident!("__archmage_wild_{}", wild_rename_counter);
681            wild_rename_counter += 1;
682            *pat_type.pat = syn::Pat::Ident(syn::PatIdent {
683                attrs: vec![],
684                by_ref: None,
685                mutability: None,
686                ident,
687                subpat: None,
688            });
689        }
690    }
691
692    // Choose inline attribute based on args
693    let inline_attr: Attribute = if args.inline_always {
694        parse_quote!(#[inline(always)])
695    } else {
696        parse_quote!(#[inline])
697    };
698
699    // On wasm32, #[target_feature(enable = "simd128")] functions are safe (Rust 1.54+).
700    // The wasm validation model guarantees unsupported instructions trap deterministically,
701    // so there's no UB from feature mismatch. Skip the unsafe wrapper entirely.
702    if target_arch == Some("wasm32") {
703        return arcane_impl_wasm_safe(
704            input_fn,
705            &args,
706            token_type_name,
707            target_feature_attrs,
708            inline_attr,
709        );
710    }
711
712    if args.nested {
713        arcane_impl_nested(
714            input_fn,
715            &args,
716            target_arch,
717            token_type_name,
718            target_feature_attrs,
719            inline_attr,
720        )
721    } else {
722        arcane_impl_sibling(
723            input_fn,
724            &args,
725            target_arch,
726            token_type_name,
727            target_feature_attrs,
728            inline_attr,
729        )
730    }
731}
732
733/// WASM-safe expansion: emits rite-style output (no unsafe wrapper).
734///
735/// On wasm32, `#[target_feature(enable = "simd128")]` is safe — the wasm validation
736/// model traps deterministically on unsupported instructions, so there's no UB.
737/// We emit the function directly with `#[target_feature]` + `#[inline]`, like `#[rite]`.
738///
739/// If `_self = Type` is set, we inject `let _self = self;` at the top of the body
740/// (the function stays in impl scope, so `Self` resolves naturally — no replacement needed).
741fn arcane_impl_wasm_safe(
742    input_fn: LightFn,
743    args: &ArcaneArgs,
744    token_type_name: Option<String>,
745    target_feature_attrs: Vec<Attribute>,
746    inline_attr: Attribute,
747) -> TokenStream {
748    let vis = &input_fn.vis;
749    let sig = &input_fn.sig;
750    let fn_name = &sig.ident;
751    let attrs = &input_fn.attrs;
752
753    let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
754
755    // If _self = Type is set, inject `let _self = self;` at top of body so user code
756    // referencing `_self` works. The function remains in impl scope, so `Self` resolves
757    // naturally — no Self replacement needed (unlike nested mode's inner fn).
758    let body = if args.self_type.is_some() {
759        let original_body = &input_fn.body;
760        quote! {
761            let _self = self;
762            #original_body
763        }
764    } else {
765        input_fn.body.clone()
766    };
767
768    // Prepend target_feature + inline attrs, filtering user #[inline] to avoid duplicates
769    let mut new_attrs = target_feature_attrs;
770    new_attrs.push(inline_attr);
771    for attr in filter_inline_attrs(attrs) {
772        new_attrs.push(attr.clone());
773    }
774
775    let stub = if args.stub {
776        // Build stub args for suppressing unused-variable warnings
777        let stub_args: Vec<proc_macro2::TokenStream> = sig
778            .inputs
779            .iter()
780            .filter_map(|arg| match arg {
781                FnArg::Typed(pat_type) => {
782                    if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
783                        let ident = &pat_ident.ident;
784                        Some(quote!(#ident))
785                    } else {
786                        None
787                    }
788                }
789                FnArg::Receiver(_) => None,
790            })
791            .collect();
792
793        quote! {
794            #[cfg(not(target_arch = "wasm32"))]
795            #vis #sig {
796                let _ = (#(#stub_args),*);
797                unreachable!(
798                    "BUG: {}() was called but requires {} (target_arch = \"wasm32\"). \
799                     {}::summon() returns None on this architecture, so this function \
800                     is unreachable in safe code. If you used forge_token_dangerously(), \
801                     that is the bug.",
802                    stringify!(#fn_name),
803                    #token_type_str,
804                    #token_type_str,
805                )
806            }
807        }
808    } else {
809        quote! {}
810    };
811
812    let expanded = quote! {
813        #[cfg(target_arch = "wasm32")]
814        #(#new_attrs)*
815        #vis #sig {
816            #body
817        }
818
819        #stub
820    };
821
822    expanded.into()
823}
824
825/// Sibling expansion (default): generates two functions at the same scope level.
826///
827/// ```ignore
828/// // #[arcane] fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { body }
829/// // expands to:
830/// #[cfg(target_arch = "x86_64")]
831/// #[doc(hidden)]
832/// #[target_feature(enable = "avx2,fma,...")]
833/// #[inline]
834/// fn __arcane_process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { body }
835///
836/// #[cfg(target_arch = "x86_64")]
837/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] {
838///     unsafe { __arcane_process(token, data) }
839/// }
840/// ```
841///
842/// The sibling function is safe (Rust 2024 edition allows safe `#[target_feature]`
843/// functions). Only the call from the wrapper needs `unsafe` because the wrapper
844/// lacks matching target features. Compatible with `#![forbid(unsafe_code)]`.
845///
846/// Self/self work naturally since both functions live in the same impl scope.
847fn arcane_impl_sibling(
848    input_fn: LightFn,
849    args: &ArcaneArgs,
850    target_arch: Option<&str>,
851    token_type_name: Option<String>,
852    target_feature_attrs: Vec<Attribute>,
853    inline_attr: Attribute,
854) -> TokenStream {
855    let vis = &input_fn.vis;
856    let sig = &input_fn.sig;
857    let fn_name = &sig.ident;
858    let generics = &sig.generics;
859    let where_clause = &generics.where_clause;
860    let inputs = &sig.inputs;
861    let output = &sig.output;
862    let body = &input_fn.body;
863    // Filter out user #[inline] attrs to avoid duplicates (will become a hard error).
864    // The wrapper gets #[inline(always)] unconditionally — it's a trivial unsafe { sibling() }.
865    let attrs = filter_inline_attrs(&input_fn.attrs);
866    // Lint-control attrs (#[allow(...)], #[expect(...)], etc.) must also go on the sibling,
867    // because the sibling has the same parameters and clippy lints it independently.
868    let lint_attrs = filter_lint_attrs(&input_fn.attrs);
869
870    let sibling_name = format_ident!("__arcane_{}", fn_name);
871
872    // Detect self receiver
873    let has_self_receiver = inputs
874        .first()
875        .map(|arg| matches!(arg, FnArg::Receiver(_)))
876        .unwrap_or(false);
877
878    // Build sibling signature: same as original but with sibling name, #[doc(hidden)]
879    // NOT unsafe — Rust 2024 edition allows safe #[target_feature] functions.
880    // Only the call from non-matching context (the wrapper) needs unsafe.
881    let sibling_sig_inputs = inputs;
882
883    // Build turbofish for forwarding type/const generic params to sibling
884    let turbofish = build_turbofish(generics);
885
886    // Build the call from wrapper to sibling
887    let sibling_call = if has_self_receiver {
888        // Method: self.__arcane_fn::<T, N>(other_args...)
889        let other_args: Vec<proc_macro2::TokenStream> = inputs
890            .iter()
891            .skip(1) // skip self receiver
892            .filter_map(|arg| {
893                if let FnArg::Typed(pat_type) = arg
894                    && let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref()
895                {
896                    let ident = &pat_ident.ident;
897                    Some(quote!(#ident))
898                } else {
899                    None
900                }
901            })
902            .collect();
903        quote! { self.#sibling_name #turbofish(#(#other_args),*) }
904    } else {
905        // Free function: __arcane_fn::<T, N>(all_args...)
906        let all_args: Vec<proc_macro2::TokenStream> = inputs
907            .iter()
908            .filter_map(|arg| {
909                if let FnArg::Typed(pat_type) = arg
910                    && let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref()
911                {
912                    let ident = &pat_ident.ident;
913                    Some(quote!(#ident))
914                } else {
915                    None
916                }
917            })
918            .collect();
919        quote! { #sibling_name #turbofish(#(#all_args),*) }
920    };
921
922    // Build stub args for suppressing unused warnings
923    let stub_args: Vec<proc_macro2::TokenStream> = inputs
924        .iter()
925        .filter_map(|arg| match arg {
926            FnArg::Typed(pat_type) => {
927                if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
928                    let ident = &pat_ident.ident;
929                    Some(quote!(#ident))
930                } else {
931                    None
932                }
933            }
934            FnArg::Receiver(_) => None, // self doesn't need _ = suppression
935        })
936        .collect();
937
938    let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
939
940    let cfg_guard = gen_cfg_guard(target_arch, args.cfg_feature.as_deref());
941
942    let expanded = if target_arch.is_some() {
943        // Sibling function: #[doc(hidden)] #[target_feature] fn __arcane_fn(...)
944        // Always private — only the wrapper is user-visible.
945        // Safe declaration — Rust 2024 allows safe #[target_feature] functions.
946        let sibling_fn = quote! {
947            #cfg_guard
948            #[doc(hidden)]
949            #(#lint_attrs)*
950            #(#target_feature_attrs)*
951            #inline_attr
952            fn #sibling_name #generics (#sibling_sig_inputs) #output #where_clause {
953                #body
954            }
955        };
956
957        // Wrapper function: fn original_name(...) { unsafe { sibling_call } }
958        // The unsafe block is needed because the sibling has #[target_feature] and
959        // the wrapper doesn't — calling across this boundary requires unsafe.
960        let wrapper_fn = quote! {
961            #cfg_guard
962            #(#attrs)*
963            #[inline(always)]
964            #vis #sig {
965                // SAFETY: The token parameter proves the required CPU features are available.
966                // Calling a #[target_feature] function from a non-matching context requires
967                // unsafe because the CPU may not support those instructions. The token's
968                // existence proves summon() succeeded, so the features are available.
969                unsafe { #sibling_call }
970            }
971        };
972
973        // Optional stub for other architectures / missing feature
974        let stub = if args.stub {
975            let arch_str = target_arch.unwrap_or("unknown");
976            // Negate the cfg guard used for the real implementation
977            let not_cfg = match (target_arch, args.cfg_feature.as_deref()) {
978                (Some(arch), Some(feat)) => {
979                    quote! { #[cfg(not(all(target_arch = #arch, feature = #feat)))] }
980                }
981                (Some(arch), None) => quote! { #[cfg(not(target_arch = #arch))] },
982                _ => quote! {},
983            };
984            quote! {
985                #not_cfg
986                #(#attrs)*
987                #vis #sig {
988                    let _ = (#(#stub_args),*);
989                    unreachable!(
990                        "BUG: {}() was called but requires {} (target_arch = \"{}\"). \
991                         {}::summon() returns None on this architecture, so this function \
992                         is unreachable in safe code. If you used forge_token_dangerously(), \
993                         that is the bug.",
994                        stringify!(#fn_name),
995                        #token_type_str,
996                        #arch_str,
997                        #token_type_str,
998                    )
999                }
1000            }
1001        } else {
1002            quote! {}
1003        };
1004
1005        quote! {
1006            #sibling_fn
1007            #wrapper_fn
1008            #stub
1009        }
1010    } else {
1011        // No specific arch (trait bounds or generic) - no cfg guards, no stub needed.
1012        // Still use sibling pattern for consistency. Sibling is always private.
1013        let sibling_fn = quote! {
1014            #[doc(hidden)]
1015            #(#lint_attrs)*
1016            #(#target_feature_attrs)*
1017            #inline_attr
1018            fn #sibling_name #generics (#sibling_sig_inputs) #output #where_clause {
1019                #body
1020            }
1021        };
1022
1023        let wrapper_fn = quote! {
1024            #(#attrs)*
1025            #[inline(always)]
1026            #vis #sig {
1027                // SAFETY: The token proves the required CPU features are available.
1028                unsafe { #sibling_call }
1029            }
1030        };
1031
1032        quote! {
1033            #sibling_fn
1034            #wrapper_fn
1035        }
1036    };
1037
1038    expanded.into()
1039}
1040
1041/// Nested inner function expansion (opt-in via `nested` or `_self = Type`).
1042///
1043/// This is the original approach: generates a nested inner function inside the
1044/// original function. Required when `_self = Type` is used because Self must be
1045/// replaced in the nested function (where it's not in scope).
1046fn arcane_impl_nested(
1047    input_fn: LightFn,
1048    args: &ArcaneArgs,
1049    target_arch: Option<&str>,
1050    token_type_name: Option<String>,
1051    target_feature_attrs: Vec<Attribute>,
1052    inline_attr: Attribute,
1053) -> TokenStream {
1054    let vis = &input_fn.vis;
1055    let sig = &input_fn.sig;
1056    let fn_name = &sig.ident;
1057    let generics = &sig.generics;
1058    let where_clause = &generics.where_clause;
1059    let inputs = &sig.inputs;
1060    let output = &sig.output;
1061    let body = &input_fn.body;
1062    // Filter out user #[inline] attrs to avoid duplicates (will become a hard error).
1063    let attrs = filter_inline_attrs(&input_fn.attrs);
1064    // Propagate lint attrs to inner function (same issue as sibling mode — #17)
1065    let lint_attrs = filter_lint_attrs(&input_fn.attrs);
1066
1067    // Determine self receiver type if present
1068    let self_receiver_kind: Option<SelfReceiver> = inputs.first().and_then(|arg| match arg {
1069        FnArg::Receiver(receiver) => {
1070            if receiver.reference.is_none() {
1071                Some(SelfReceiver::Owned)
1072            } else if receiver.mutability.is_some() {
1073                Some(SelfReceiver::RefMut)
1074            } else {
1075                Some(SelfReceiver::Ref)
1076            }
1077        }
1078        _ => None,
1079    });
1080
1081    // Build inner function parameters, transforming self if needed.
1082    // Also replace Self in non-self parameter types when _self = Type is set,
1083    // since the inner function is a nested fn where Self from the impl is not in scope.
1084    let inner_params: Vec<proc_macro2::TokenStream> = inputs
1085        .iter()
1086        .map(|arg| match arg {
1087            FnArg::Receiver(_) => {
1088                // Transform self receiver to _self parameter
1089                let self_ty = args.self_type.as_ref().unwrap();
1090                match self_receiver_kind.as_ref().unwrap() {
1091                    SelfReceiver::Owned => quote!(_self: #self_ty),
1092                    SelfReceiver::Ref => quote!(_self: &#self_ty),
1093                    SelfReceiver::RefMut => quote!(_self: &mut #self_ty),
1094                }
1095            }
1096            FnArg::Typed(pat_type) => {
1097                if let Some(ref self_ty) = args.self_type {
1098                    replace_self_in_tokens(quote!(#pat_type), self_ty)
1099                } else {
1100                    quote!(#pat_type)
1101                }
1102            }
1103        })
1104        .collect();
1105
1106    // Build inner function call arguments
1107    let inner_args: Vec<proc_macro2::TokenStream> = inputs
1108        .iter()
1109        .filter_map(|arg| match arg {
1110            FnArg::Typed(pat_type) => {
1111                if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
1112                    let ident = &pat_ident.ident;
1113                    Some(quote!(#ident))
1114                } else {
1115                    None
1116                }
1117            }
1118            FnArg::Receiver(_) => Some(quote!(self)), // Pass self to inner as _self
1119        })
1120        .collect();
1121
1122    let inner_fn_name = format_ident!("__simd_inner_{}", fn_name);
1123
1124    // Build turbofish for forwarding type/const generic params to inner function
1125    let turbofish = build_turbofish(generics);
1126
1127    // Transform output, body, and where clause to replace Self with concrete type if needed.
1128    let (inner_output, inner_body, inner_where_clause): (
1129        proc_macro2::TokenStream,
1130        proc_macro2::TokenStream,
1131        proc_macro2::TokenStream,
1132    ) = if let Some(ref self_ty) = args.self_type {
1133        let transformed_output = replace_self_in_tokens(output.to_token_stream(), self_ty);
1134        let transformed_body = replace_self_in_tokens(body.clone(), self_ty);
1135        let transformed_where = where_clause
1136            .as_ref()
1137            .map(|wc| replace_self_in_tokens(wc.to_token_stream(), self_ty))
1138            .unwrap_or_default();
1139        (transformed_output, transformed_body, transformed_where)
1140    } else {
1141        (
1142            output.to_token_stream(),
1143            body.clone(),
1144            where_clause
1145                .as_ref()
1146                .map(|wc| wc.to_token_stream())
1147                .unwrap_or_default(),
1148        )
1149    };
1150
1151    let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
1152    let cfg_guard = gen_cfg_guard(target_arch, args.cfg_feature.as_deref());
1153
1154    let expanded = if target_arch.is_some() {
1155        let stub = if args.stub {
1156            let arch_str = target_arch.unwrap_or("unknown");
1157            let not_cfg = match (target_arch, args.cfg_feature.as_deref()) {
1158                (Some(arch), Some(feat)) => {
1159                    quote! { #[cfg(not(all(target_arch = #arch, feature = #feat)))] }
1160                }
1161                (Some(arch), None) => quote! { #[cfg(not(target_arch = #arch))] },
1162                _ => quote! {},
1163            };
1164            quote! {
1165                #not_cfg
1166                #(#attrs)*
1167                #vis #sig {
1168                    let _ = (#(#inner_args),*);
1169                    unreachable!(
1170                        "BUG: {}() was called but requires {} (target_arch = \"{}\"). \
1171                         {}::summon() returns None on this architecture, so this function \
1172                         is unreachable in safe code. If you used forge_token_dangerously(), \
1173                         that is the bug.",
1174                        stringify!(#fn_name),
1175                        #token_type_str,
1176                        #arch_str,
1177                        #token_type_str,
1178                    )
1179                }
1180            }
1181        } else {
1182            quote! {}
1183        };
1184
1185        quote! {
1186            // Real implementation for the correct architecture
1187            #cfg_guard
1188            #(#attrs)*
1189            #[inline(always)]
1190            #vis #sig {
1191                #(#target_feature_attrs)*
1192                #inline_attr
1193                #(#lint_attrs)*
1194                fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #inner_where_clause {
1195                    #inner_body
1196                }
1197
1198                // SAFETY: The token parameter proves the required CPU features are available.
1199                unsafe { #inner_fn_name #turbofish(#(#inner_args),*) }
1200            }
1201
1202            #stub
1203        }
1204    } else {
1205        // No specific arch (trait bounds or generic) - generate without cfg guards
1206        quote! {
1207            #(#attrs)*
1208            #[inline(always)]
1209            #vis #sig {
1210                #(#target_feature_attrs)*
1211                #inline_attr
1212                #(#lint_attrs)*
1213                fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #inner_where_clause {
1214                    #inner_body
1215                }
1216
1217                // SAFETY: The token proves the required CPU features are available.
1218                unsafe { #inner_fn_name #turbofish(#(#inner_args),*) }
1219            }
1220        }
1221    };
1222
1223    expanded.into()
1224}
1225
1226/// Mark a function as an arcane SIMD function.
1227///
1228/// This macro generates a safe wrapper around a `#[target_feature]` function.
1229/// The token parameter type determines which CPU features are enabled.
1230///
1231/// # Expansion Modes
1232///
1233/// ## Sibling (default)
1234///
1235/// Generates two functions at the same scope: a safe `#[target_feature]` sibling
1236/// and a safe wrapper. `self`/`Self` work naturally since both functions share scope.
1237/// Compatible with `#![forbid(unsafe_code)]`.
1238///
1239/// ```ignore
1240/// #[arcane]
1241/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { /* body */ }
1242/// // Expands to (x86_64 only):
1243/// #[cfg(target_arch = "x86_64")]
1244/// #[doc(hidden)]
1245/// #[target_feature(enable = "avx2,fma,...")]
1246/// fn __arcane_process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { /* body */ }
1247///
1248/// #[cfg(target_arch = "x86_64")]
1249/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] {
1250///     unsafe { __arcane_process(token, data) }
1251/// }
1252/// ```
1253///
1254/// Methods work naturally:
1255///
1256/// ```ignore
1257/// impl MyType {
1258///     #[arcane]
1259///     fn compute(&self, token: X64V3Token) -> f32 {
1260///         self.data.iter().sum()  // self/Self just work!
1261///     }
1262/// }
1263/// ```
1264///
1265/// ## Nested (`nested` or `_self = Type`)
1266///
1267/// Generates a nested inner function inside the original. Required for trait impls
1268/// (where sibling functions would fail) and when `_self = Type` is used.
1269///
1270/// ```ignore
1271/// impl SimdOps for MyType {
1272///     #[arcane(_self = MyType)]
1273///     fn compute(&self, token: X64V3Token) -> Self {
1274///         // Use _self instead of self, Self replaced with MyType
1275///         _self.data.iter().sum()
1276///     }
1277/// }
1278/// ```
1279///
1280/// # Cross-Architecture Behavior
1281///
1282/// **Default (cfg-out):** On the wrong architecture, the function is not emitted
1283/// at all — no stub, no dead code. Code that references it must be cfg-gated.
1284///
1285/// **With `stub`:** Generates an `unreachable!()` stub on wrong architectures.
1286/// Use when cross-arch dispatch references the function without cfg guards.
1287///
1288/// ```ignore
1289/// #[arcane(stub)]  // generates stub on wrong arch
1290/// fn process_neon(token: NeonToken, data: &[f32]) -> f32 { ... }
1291/// ```
1292///
1293/// `incant!` is unaffected — it already cfg-gates dispatch calls by architecture.
1294///
1295/// # Token Parameter Forms
1296///
1297/// ```ignore
1298/// // Concrete token
1299/// #[arcane]
1300/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { ... }
1301///
1302/// // impl Trait bound
1303/// #[arcane]
1304/// fn process(token: impl HasX64V2, data: &[f32; 8]) -> [f32; 8] { ... }
1305///
1306/// // Generic with inline or where-clause bounds
1307/// #[arcane]
1308/// fn process<T: HasX64V2>(token: T, data: &[f32; 8]) -> [f32; 8] { ... }
1309///
1310/// // Wildcard
1311/// #[arcane]
1312/// fn process(_: X64V3Token, data: &[f32; 8]) -> [f32; 8] { ... }
1313/// ```
1314///
1315/// # Options
1316///
1317/// | Option | Effect |
1318/// |--------|--------|
1319/// | `stub` | Generate `unreachable!()` stub on wrong architecture |
1320/// | `nested` | Use nested inner function instead of sibling |
1321/// | `_self = Type` | Implies `nested`, transforms self receiver, replaces Self |
1322/// | `inline_always` | Use `#[inline(always)]` (requires nightly) |
1323/// | `import_intrinsics` | Auto-import `archmage::intrinsics::{arch}::*` (includes safe memory ops) |
1324/// | `import_magetypes` | Auto-import `magetypes::simd::{ns}::*` and `magetypes::simd::backends::*` |
1325///
1326/// ## Auto-Imports
1327///
1328/// `import_intrinsics` and `import_magetypes` inject `use` statements into the
1329/// function body, eliminating boilerplate. The macro derives the architecture and
1330/// namespace from the token type:
1331///
1332/// ```ignore
1333/// // Without auto-imports — lots of boilerplate:
1334/// use std::arch::x86_64::*;
1335/// use magetypes::simd::v3::*;
1336///
1337/// #[arcane]
1338/// fn process(token: X64V3Token, data: &[f32; 8]) -> f32 {
1339///     let v = f32x8::load(token, data);
1340///     let zero = _mm256_setzero_ps();
1341///     // ...
1342/// }
1343///
1344/// // With auto-imports — clean:
1345/// #[arcane(import_intrinsics, import_magetypes)]
1346/// fn process(token: X64V3Token, data: &[f32; 8]) -> f32 {
1347///     let v = f32x8::load(token, data);
1348///     let zero = _mm256_setzero_ps();
1349///     // ...
1350/// }
1351/// ```
1352///
1353/// The namespace mapping is token-driven:
1354///
1355/// | Token | `import_intrinsics` | `import_magetypes` |
1356/// |-------|--------------------|--------------------|
1357/// | `X64V1..V3Token` | `archmage::intrinsics::x86_64::*` | `magetypes::simd::v3::*` |
1358/// | `X64V4Token` | `archmage::intrinsics::x86_64::*` | `magetypes::simd::v4::*` |
1359/// | `X64V4xToken` | `archmage::intrinsics::x86_64::*` | `magetypes::simd::v4x::*` |
1360/// | `NeonToken` / ARM | `archmage::intrinsics::aarch64::*` | `magetypes::simd::neon::*` |
1361/// | `Wasm128Token` | `archmage::intrinsics::wasm32::*` | `magetypes::simd::wasm128::*` |
1362///
1363/// Works with concrete tokens, `impl Trait` bounds, and generic parameters.
1364///
1365/// # Supported Tokens
1366///
1367/// - **x86_64**: `X64V2Token`, `X64V3Token`/`Desktop64`, `X64V4Token`/`Avx512Token`/`Server64`,
1368///   `X64V4xToken`, `Avx512Fp16Token`, `X64CryptoToken`, `X64V3CryptoToken`
1369/// - **ARM**: `NeonToken`/`Arm64`, `Arm64V2Token`, `Arm64V3Token`,
1370///   `NeonAesToken`, `NeonSha3Token`, `NeonCrcToken`
1371/// - **WASM**: `Wasm128Token`
1372///
1373/// # Supported Trait Bounds
1374///
1375/// `HasX64V2`, `HasX64V4`, `HasNeon`, `HasNeonAes`, `HasNeonSha3`, `HasArm64V2`, `HasArm64V3`
1376///
1377/// ```ignore
1378/// #![feature(target_feature_inline_always)]
1379///
1380/// #[arcane(inline_always)]
1381/// fn fast_kernel(token: Avx2Token, data: &mut [f32]) {
1382///     // Inner function will use #[inline(always)]
1383/// }
1384/// ```
1385#[proc_macro_attribute]
1386pub fn arcane(attr: TokenStream, item: TokenStream) -> TokenStream {
1387    let args = parse_macro_input!(attr as ArcaneArgs);
1388    let input_fn = parse_macro_input!(item as LightFn);
1389    arcane_impl(input_fn, "arcane", args)
1390}
1391
1392/// Legacy alias for [`arcane`].
1393///
1394/// **Deprecated:** Use `#[arcane]` instead. This alias exists only for migration.
1395#[proc_macro_attribute]
1396#[doc(hidden)]
1397pub fn simd_fn(attr: TokenStream, item: TokenStream) -> TokenStream {
1398    let args = parse_macro_input!(attr as ArcaneArgs);
1399    let input_fn = parse_macro_input!(item as LightFn);
1400    arcane_impl(input_fn, "simd_fn", args)
1401}
1402
1403/// Descriptive alias for [`arcane`].
1404///
1405/// Generates a safe wrapper around a `#[target_feature]` inner function.
1406/// The token type in your signature determines which CPU features are enabled.
1407/// Creates an LLVM optimization boundary — use [`token_target_features`]
1408/// (alias for [`rite`]) for inner helpers to avoid this.
1409///
1410/// Since Rust 1.85, value-based SIMD intrinsics are safe inside
1411/// `#[target_feature]` functions. This macro generates the `#[target_feature]`
1412/// wrapper so you never need to write `unsafe` for SIMD code.
1413///
1414/// See [`arcane`] for full documentation and examples.
1415#[proc_macro_attribute]
1416pub fn token_target_features_boundary(attr: TokenStream, item: TokenStream) -> TokenStream {
1417    let args = parse_macro_input!(attr as ArcaneArgs);
1418    let input_fn = parse_macro_input!(item as LightFn);
1419    arcane_impl(input_fn, "token_target_features_boundary", args)
1420}
1421
1422// ============================================================================
1423// Rite macro for inner SIMD functions (inlines into matching #[target_feature] callers)
1424// ============================================================================
1425
1426/// Annotate inner SIMD helpers called from `#[arcane]` functions.
1427///
1428/// Unlike `#[arcane]`, which creates an inner `#[target_feature]` function behind
1429/// a safe boundary, `#[rite]` adds `#[target_feature]` and `#[inline]` directly.
1430/// LLVM inlines it into any caller with matching features — no boundary crossing.
1431///
1432/// # Three Modes
1433///
1434/// **Token-based:** Reads the token type from the function signature.
1435/// ```ignore
1436/// #[rite]
1437/// fn helper(_: X64V3Token, v: __m256) -> __m256 { _mm256_add_ps(v, v) }
1438/// ```
1439///
1440/// **Tier-based:** Specify the tier name directly, no token parameter needed.
1441/// ```ignore
1442/// #[rite(v3)]
1443/// fn helper(v: __m256) -> __m256 { _mm256_add_ps(v, v) }
1444/// ```
1445///
1446/// Both produce identical code. The token form can be easier to remember if
1447/// you already have the token in scope.
1448///
1449/// **Multi-tier:** Specify multiple tiers to generate suffixed variants.
1450/// ```ignore
1451/// #[rite(v3, v4)]
1452/// fn process(data: &[f32; 4]) -> f32 { data.iter().sum() }
1453/// // Generates: process_v3() and process_v4()
1454/// ```
1455///
1456/// Each variant gets its own `#[target_feature]` and `#[cfg(target_arch)]`.
1457/// Since Rust 1.85, calling these from a matching `#[arcane]` or `#[rite]`
1458/// context is safe — no `unsafe` needed when the caller has matching or
1459/// superset features.
1460///
1461/// # Safety
1462///
1463/// `#[rite]` functions can only be safely called from contexts where the
1464/// required CPU features are enabled:
1465/// - From within `#[arcane]` functions with matching/superset tokens
1466/// - From within other `#[rite]` functions with matching/superset tokens
1467/// - From code compiled with `-Ctarget-cpu` that enables the features
1468///
1469/// Calling from other contexts requires `unsafe` and the caller must ensure
1470/// the CPU supports the required features.
1471///
1472/// # Cross-Architecture Behavior
1473///
1474/// Like `#[arcane]`, defaults to cfg-out (no function on wrong arch).
1475/// Use `#[rite(stub)]` to generate an unreachable stub instead.
1476///
1477/// # Options
1478///
1479/// | Option | Effect |
1480/// |--------|--------|
1481/// | tier name(s) | `v3`, `neon`, etc. One = single function; multiple = suffixed variants |
1482/// | `stub` | Generate `unreachable!()` stub on wrong architecture |
1483/// | `import_intrinsics` | Auto-import `archmage::intrinsics::{arch}::*` (includes safe memory ops) |
1484/// | `import_magetypes` | Auto-import `magetypes::simd::{ns}::*` and `magetypes::simd::backends::*` |
1485///
1486/// See `#[arcane]` docs for the full namespace mapping table.
1487///
1488/// # Comparison with #[arcane]
1489///
1490/// | Aspect | `#[arcane]` | `#[rite]` |
1491/// |--------|-------------|-----------|
1492/// | Creates wrapper | Yes | No |
1493/// | Entry point | Yes | No |
1494/// | Inlines into caller | No (barrier) | Yes |
1495/// | Safe to call anywhere | Yes (with token) | Only from feature-enabled context |
1496/// | Multi-tier variants | No | Yes (`#[rite(v3, v4, neon)]`) |
1497/// | `stub` param | Yes | Yes |
1498/// | `import_intrinsics` | Yes | Yes |
1499/// | `import_magetypes` | Yes | Yes |
1500#[proc_macro_attribute]
1501pub fn rite(attr: TokenStream, item: TokenStream) -> TokenStream {
1502    let args = parse_macro_input!(attr as RiteArgs);
1503    let input_fn = parse_macro_input!(item as LightFn);
1504    rite_impl(input_fn, args)
1505}
1506
1507/// Descriptive alias for [`rite`].
1508///
1509/// Applies `#[target_feature]` + `#[inline]` based on the token type in your
1510/// function signature. No wrapper, no optimization boundary. Use for functions
1511/// called from within `#[arcane]`/`#[token_target_features_boundary]` code.
1512///
1513/// Since Rust 1.85, calling a `#[target_feature]` function from another function
1514/// with matching features is safe — no `unsafe` needed.
1515///
1516/// See [`rite`] for full documentation and examples.
1517#[proc_macro_attribute]
1518pub fn token_target_features(attr: TokenStream, item: TokenStream) -> TokenStream {
1519    let args = parse_macro_input!(attr as RiteArgs);
1520    let input_fn = parse_macro_input!(item as LightFn);
1521    rite_impl(input_fn, args)
1522}
1523
1524/// Arguments for the `#[rite]` macro.
1525#[derive(Default)]
1526struct RiteArgs {
1527    /// Generate an `unreachable!()` stub on the wrong architecture.
1528    /// Default is false (cfg-out: no function emitted on wrong arch).
1529    stub: bool,
1530    /// Inject `use archmage::intrinsics::{arch}::*;` (includes safe memory ops).
1531    import_intrinsics: bool,
1532    /// Inject `use magetypes::simd::{ns}::*;`, `use magetypes::simd::generic::*;`,
1533    /// and `use magetypes::simd::backends::*;`.
1534    import_magetypes: bool,
1535    /// Tiers specified directly (e.g., `#[rite(v3)]` or `#[rite(v3, v4, neon)]`).
1536    /// Stored as canonical token names (e.g., "X64V3Token").
1537    /// Single tier: generates one function (no suffix, no token parameter needed).
1538    /// Multiple tiers: generates suffixed variants (e.g., `fn_v3`, `fn_v4`, `fn_neon`).
1539    tier_tokens: Vec<String>,
1540    /// Additional cargo feature gate (same as arcane's cfg_feature).
1541    cfg_feature: Option<String>,
1542}
1543
1544impl Parse for RiteArgs {
1545    fn parse(input: ParseStream) -> syn::Result<Self> {
1546        let mut args = RiteArgs::default();
1547
1548        while !input.is_empty() {
1549            let ident: Ident = input.parse()?;
1550            match ident.to_string().as_str() {
1551                "stub" => args.stub = true,
1552                "import_intrinsics" => args.import_intrinsics = true,
1553                "import_magetypes" => args.import_magetypes = true,
1554                "cfg" => {
1555                    let content;
1556                    syn::parenthesized!(content in input);
1557                    let feat: Ident = content.parse()?;
1558                    args.cfg_feature = Some(feat.to_string());
1559                }
1560                other => {
1561                    if let Some(canonical) = tier_to_canonical_token(other) {
1562                        args.tier_tokens.push(String::from(canonical));
1563                    } else {
1564                        return Err(syn::Error::new(
1565                            ident.span(),
1566                            format!(
1567                                "unknown rite argument: `{}`. Supported: tier names \
1568                                 (v1, v2, v3, v4, neon, arm_v2, wasm128, ...), \
1569                                 `stub`, `import_intrinsics`, `import_magetypes`, `cfg(feature)`.",
1570                                other
1571                            ),
1572                        ));
1573                    }
1574                }
1575            }
1576            if input.peek(Token![,]) {
1577                let _: Token![,] = input.parse()?;
1578            }
1579        }
1580
1581        Ok(args)
1582    }
1583}
1584
1585/// Implementation for the `#[rite]` macro.
1586fn rite_impl(input_fn: LightFn, args: RiteArgs) -> TokenStream {
1587    // Multi-tier mode: generate suffixed variants for each tier
1588    if args.tier_tokens.len() > 1 {
1589        return rite_multi_tier_impl(input_fn, &args);
1590    }
1591
1592    // Single-tier or token-param mode
1593    rite_single_impl(input_fn, args)
1594}
1595
1596/// Generate a single `#[rite]` function (single tier or token-param mode).
1597fn rite_single_impl(mut input_fn: LightFn, args: RiteArgs) -> TokenStream {
1598    // Resolve features: either from tier name or from token parameter
1599    let TokenParamInfo {
1600        features,
1601        target_arch,
1602        token_type_name: _token_type_name,
1603        magetypes_namespace,
1604        ..
1605    } = if let Some(tier_token) = args.tier_tokens.first() {
1606        // Tier specified directly (e.g., #[rite(v3)]) — no token param needed
1607        let features = token_to_features(tier_token)
1608            .expect("tier_to_canonical_token returned invalid token name")
1609            .to_vec();
1610        let target_arch = token_to_arch(tier_token);
1611        let magetypes_namespace = token_to_magetypes_namespace(tier_token);
1612        TokenParamInfo {
1613            ident: Ident::new("_", proc_macro2::Span::call_site()),
1614            features,
1615            target_arch,
1616            token_type_name: Some(tier_token.clone()),
1617            magetypes_namespace,
1618        }
1619    } else {
1620        match find_token_param(&input_fn.sig) {
1621            Some(result) => result,
1622            None => {
1623                // Check for specific misuse: featureless traits like SimdToken
1624                if let Some(trait_name) = diagnose_featureless_token(&input_fn.sig) {
1625                    let msg = format!(
1626                        "`{trait_name}` cannot be used as a token bound in #[rite] \
1627                         because it doesn't specify any CPU features.\n\
1628                         \n\
1629                         #[rite] needs concrete features to generate #[target_feature]. \
1630                         Use a concrete token, a feature trait, or a tier name:\n\
1631                         \n\
1632                         Concrete tokens: X64V3Token, Desktop64, NeonToken, Arm64V2Token, ...\n\
1633                         Feature traits:  impl HasX64V2, impl HasNeon, impl HasArm64V3, ...\n\
1634                         Tier names:      #[rite(v3)], #[rite(neon)], #[rite(v4)], ..."
1635                    );
1636                    return syn::Error::new_spanned(&input_fn.sig, msg)
1637                        .to_compile_error()
1638                        .into();
1639                }
1640                let msg = "rite requires a token parameter or a tier name. Supported forms:\n\
1641                     - Tier name: `#[rite(v3)]`, `#[rite(neon)]`\n\
1642                     - Multi-tier: `#[rite(v3, v4, neon)]` (generates suffixed variants)\n\
1643                     - Concrete: `token: X64V3Token`\n\
1644                     - impl Trait: `token: impl HasX64V2`\n\
1645                     - Generic: `fn foo<T: HasX64V2>(token: T, ...)`";
1646                return syn::Error::new_spanned(&input_fn.sig, msg)
1647                    .to_compile_error()
1648                    .into();
1649            }
1650        }
1651    };
1652
1653    // Check: import_intrinsics with AVX-512 features requires the avx512 cargo feature.
1654    // Check resolved features (not token name) for uniform handling of concrete/trait/generic.
1655    #[cfg(not(feature = "avx512"))]
1656    if args.import_intrinsics && features.iter().any(|f| f.starts_with("avx512")) {
1657        let token_desc = _token_type_name.as_deref().unwrap_or("an AVX-512 token");
1658        let msg = format!(
1659            "Using {token_desc} with `import_intrinsics` requires the `avx512` feature.\n\
1660             \n\
1661             Add to your Cargo.toml:\n\
1662             \x20 archmage = {{ version = \"...\", features = [\"avx512\"] }}\n\
1663             \n\
1664             Without it, 512-bit safe memory ops (_mm512_loadu_ps etc.) are not available.\n\
1665             If you only need value intrinsics (no memory ops), remove `import_intrinsics`."
1666        );
1667        return syn::Error::new_spanned(&input_fn.sig, msg)
1668            .to_compile_error()
1669            .into();
1670    }
1671
1672    // Build target_feature attributes
1673    let target_feature_attrs: Vec<Attribute> = features
1674        .iter()
1675        .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
1676        .collect();
1677
1678    // Always use #[inline] - #[inline(always)] + #[target_feature] requires nightly
1679    let inline_attr: Attribute = parse_quote!(#[inline]);
1680
1681    // Prepend attributes to the function, filtering user #[inline] to avoid duplicates
1682    let mut new_attrs = target_feature_attrs;
1683    new_attrs.push(inline_attr);
1684    for attr in filter_inline_attrs(&input_fn.attrs) {
1685        new_attrs.push(attr.clone());
1686    }
1687    input_fn.attrs = new_attrs;
1688
1689    // Prepend import statements to body if requested
1690    let body_imports = generate_imports(
1691        target_arch,
1692        magetypes_namespace,
1693        args.import_intrinsics,
1694        args.import_magetypes,
1695    );
1696    if !body_imports.is_empty() {
1697        let original_body = &input_fn.body;
1698        input_fn.body = quote! {
1699            #body_imports
1700            #original_body
1701        };
1702    }
1703
1704    // If we know the target arch, generate cfg-gated impl (+ optional stub)
1705    let cfg_guard = gen_cfg_guard(target_arch, args.cfg_feature.as_deref());
1706    if target_arch.is_some() {
1707        let vis = &input_fn.vis;
1708        let sig = &input_fn.sig;
1709        let attrs = &input_fn.attrs;
1710        let body = &input_fn.body;
1711
1712        let stub = if args.stub {
1713            let not_cfg = match (target_arch, args.cfg_feature.as_deref()) {
1714                (Some(arch), Some(feat)) => {
1715                    quote! { #[cfg(not(all(target_arch = #arch, feature = #feat)))] }
1716                }
1717                (Some(arch), None) => quote! { #[cfg(not(target_arch = #arch))] },
1718                _ => quote! {},
1719            };
1720            quote! {
1721                #not_cfg
1722                #vis #sig {
1723                    unreachable!("This function requires a specific architecture and feature set")
1724                }
1725            }
1726        } else {
1727            quote! {}
1728        };
1729
1730        quote! {
1731            #cfg_guard
1732            #(#attrs)*
1733            #vis #sig {
1734                #body
1735            }
1736
1737            #stub
1738        }
1739        .into()
1740    } else {
1741        // No specific arch (trait bounds) - just emit the annotated function
1742        quote!(#input_fn).into()
1743    }
1744}
1745
1746/// Generate multiple suffixed `#[rite]` variants for multi-tier mode.
1747///
1748/// `#[rite(v3, v4, neon)]` on `fn process(...)` generates:
1749/// - `fn process_v3(...)` with `#[target_feature(enable = "avx2,fma,...")]`
1750/// - `fn process_v4(...)` with `#[target_feature(enable = "avx512f,...")]`
1751/// - `fn process_neon(...)` with `#[target_feature(enable = "neon")]`
1752///
1753/// Each variant is cfg-gated to its architecture and gets `#[inline]`.
1754fn rite_multi_tier_impl(input_fn: LightFn, args: &RiteArgs) -> TokenStream {
1755    let fn_name = &input_fn.sig.ident;
1756    let mut variants = proc_macro2::TokenStream::new();
1757
1758    for tier_token in &args.tier_tokens {
1759        let features = match token_to_features(tier_token) {
1760            Some(f) => f,
1761            None => {
1762                return syn::Error::new_spanned(
1763                    &input_fn.sig,
1764                    format!("unknown token `{tier_token}` in multi-tier #[rite]"),
1765                )
1766                .to_compile_error()
1767                .into();
1768            }
1769        };
1770        let target_arch = token_to_arch(tier_token);
1771        let magetypes_namespace = token_to_magetypes_namespace(tier_token);
1772
1773        // Check: import_intrinsics with AVX-512 features requires the avx512 cargo feature.
1774        #[cfg(not(feature = "avx512"))]
1775        if args.import_intrinsics && features.iter().any(|f| f.starts_with("avx512")) {
1776            let msg = format!(
1777                "Using {tier_token} with `import_intrinsics` requires the `avx512` feature.\n\
1778                 \n\
1779                 Add to your Cargo.toml:\n\
1780                 \x20 archmage = {{ version = \"...\", features = [\"avx512\"] }}\n\
1781                 \n\
1782                 Without it, 512-bit safe memory ops (_mm512_loadu_ps etc.) are not available.\n\
1783                 If you only need value intrinsics (no memory ops), remove `import_intrinsics`."
1784            );
1785            return syn::Error::new_spanned(&input_fn.sig, msg)
1786                .to_compile_error()
1787                .into();
1788        }
1789
1790        let suffix = canonical_token_to_tier_suffix(tier_token)
1791            .expect("canonical token must have a tier suffix");
1792
1793        // Build suffixed function name
1794        let suffixed_ident = format_ident!("{}_{}", fn_name, suffix);
1795
1796        // Clone and rename the function
1797        let mut variant_fn = input_fn.clone();
1798        variant_fn.sig.ident = suffixed_ident;
1799
1800        // Build target_feature attributes
1801        let target_feature_attrs: Vec<Attribute> = features
1802            .iter()
1803            .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
1804            .collect();
1805        let inline_attr: Attribute = parse_quote!(#[inline]);
1806
1807        let mut new_attrs = target_feature_attrs;
1808        new_attrs.push(inline_attr);
1809        for attr in filter_inline_attrs(&variant_fn.attrs) {
1810            new_attrs.push(attr.clone());
1811        }
1812        variant_fn.attrs = new_attrs;
1813
1814        // Prepend import statements if requested
1815        let body_imports = generate_imports(
1816            target_arch,
1817            magetypes_namespace,
1818            args.import_intrinsics,
1819            args.import_magetypes,
1820        );
1821        if !body_imports.is_empty() {
1822            let original_body = &variant_fn.body;
1823            variant_fn.body = quote! {
1824                #body_imports
1825                #original_body
1826            };
1827        }
1828
1829        // Emit cfg-gated variant
1830        let variant_cfg = gen_cfg_guard(target_arch, args.cfg_feature.as_deref());
1831        if target_arch.is_some() {
1832            let vis = &variant_fn.vis;
1833            let sig = &variant_fn.sig;
1834            let attrs = &variant_fn.attrs;
1835            let body = &variant_fn.body;
1836
1837            variants.extend(quote! {
1838                #variant_cfg
1839                #(#attrs)*
1840                #vis #sig {
1841                    #body
1842                }
1843            });
1844
1845            if args.stub {
1846                let not_cfg = match (target_arch, args.cfg_feature.as_deref()) {
1847                    (Some(arch), Some(feat)) => {
1848                        quote! { #[cfg(not(all(target_arch = #arch, feature = #feat)))] }
1849                    }
1850                    (Some(arch), None) => quote! { #[cfg(not(target_arch = #arch))] },
1851                    _ => quote! {},
1852                };
1853                let arch_str = target_arch.unwrap_or("unknown");
1854                variants.extend(quote! {
1855                    #not_cfg
1856                    #vis #sig {
1857                        unreachable!(concat!(
1858                            "This function requires ",
1859                            #arch_str,
1860                            " architecture"
1861                        ))
1862                    }
1863                });
1864            }
1865        } else {
1866            // No specific arch — just emit the annotated function
1867            variants.extend(quote!(#variant_fn));
1868        }
1869    }
1870
1871    variants.into()
1872}
1873
1874// =============================================================================
1875// magetypes! macro - generate platform variants from generic function
1876// =============================================================================
1877
1878/// Generate platform-specific variants from a function by replacing `Token`.
1879///
1880/// Use `Token` as a placeholder for the token type. The macro generates
1881/// suffixed variants with `Token` replaced by the concrete token type, and
1882/// each variant wrapped in the appropriate `#[cfg(target_arch = ...)]` guard.
1883///
1884/// # Default tiers
1885///
1886/// Without arguments, generates `_v3`, `_v4`, `_neon`, `_wasm128`, `_scalar`:
1887///
1888/// ```rust,ignore
1889/// #[magetypes]
1890/// fn process(token: Token, data: &[f32]) -> f32 {
1891///     inner_simd_work(token, data)
1892/// }
1893/// ```
1894///
1895/// # Explicit tiers
1896///
1897/// Specify which tiers to generate:
1898///
1899/// ```rust,ignore
1900/// #[magetypes(v1, v3, neon)]
1901/// fn process(token: Token, data: &[f32]) -> f32 {
1902///     inner_simd_work(token, data)
1903/// }
1904/// // Generates: process_v1, process_v3, process_neon, process_scalar
1905/// ```
1906///
1907/// `scalar` is always included implicitly.
1908///
1909/// Known tiers: `v1`, `v2`, `v3`, `v4`, `v4x`, `neon`, `neon_aes`,
1910/// `neon_sha3`, `neon_crc`, `wasm128`, `wasm128_relaxed`, `scalar`.
1911///
1912/// # What gets replaced
1913///
1914/// **Only `Token`** is replaced — with the concrete token type for each variant
1915/// (e.g., `archmage::X64V3Token`, `archmage::ScalarToken`). SIMD types like
1916/// `f32x8` and constants like `LANES` are **not** replaced by this macro.
1917///
1918/// # Usage with incant!
1919///
1920/// The generated variants work with `incant!` for dispatch:
1921///
1922/// ```rust,ignore
1923/// pub fn process_api(data: &[f32]) -> f32 {
1924///     incant!(process(data))
1925/// }
1926///
1927/// // Or with matching explicit tiers:
1928/// pub fn process_api(data: &[f32]) -> f32 {
1929///     incant!(process(data), [v1, v3, neon, scalar])
1930/// }
1931/// ```
1932#[proc_macro_attribute]
1933pub fn magetypes(attr: TokenStream, item: TokenStream) -> TokenStream {
1934    let input_fn = parse_macro_input!(item as LightFn);
1935
1936    // Parse optional tier list from attribute args: tier1, tier2(feature), ...
1937    let tier_names: Vec<String> = if attr.is_empty() {
1938        DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect()
1939    } else {
1940        match syn::parse::Parser::parse(parse_tier_names, attr) {
1941            Ok(names) => names,
1942            Err(e) => return e.to_compile_error().into(),
1943        }
1944    };
1945
1946    // default_optional: tiers with cfg_feature are optional by default
1947    let tiers = match resolve_tiers(
1948        &tier_names,
1949        input_fn.sig.ident.span(),
1950        true, // magetypes always uses default_optional for cfg_feature tiers
1951    ) {
1952        Ok(t) => t,
1953        Err(e) => return e.to_compile_error().into(),
1954    };
1955
1956    magetypes_impl(input_fn, &tiers)
1957}
1958
1959fn magetypes_impl(mut input_fn: LightFn, tiers: &[ResolvedTier]) -> TokenStream {
1960    // Strip user-provided #[arcane] / #[rite] to prevent double-wrapping
1961    // (magetypes auto-adds #[arcane] on non-scalar variants)
1962    input_fn
1963        .attrs
1964        .retain(|attr| !attr.path().is_ident("arcane") && !attr.path().is_ident("rite"));
1965
1966    let fn_name = &input_fn.sig.ident;
1967    let fn_attrs = &input_fn.attrs;
1968
1969    // Convert function to string for text substitution
1970    let fn_str = input_fn.to_token_stream().to_string();
1971
1972    let mut variants = Vec::new();
1973
1974    for tier in tiers {
1975        // Create suffixed function name
1976        let suffixed_name = format!("{}_{}", fn_name, tier.suffix);
1977
1978        // Do text substitution
1979        let mut variant_str = fn_str.clone();
1980
1981        // Replace function name
1982        variant_str = variant_str.replacen(&fn_name.to_string(), &suffixed_name, 1);
1983
1984        // Replace Token type with concrete token
1985        variant_str = variant_str.replace("Token", tier.token_path);
1986
1987        // Parse back to tokens
1988        let variant_tokens: proc_macro2::TokenStream = match variant_str.parse() {
1989            Ok(t) => t,
1990            Err(e) => {
1991                return syn::Error::new_spanned(
1992                    &input_fn,
1993                    format!(
1994                        "Failed to parse generated variant `{}`: {}",
1995                        suffixed_name, e
1996                    ),
1997                )
1998                .to_compile_error()
1999                .into();
2000            }
2001        };
2002
2003        // Add cfg guard: arch + optional feature gate
2004        let allow_attr = if tier.allow_unexpected_cfg {
2005            quote! { #[allow(unexpected_cfgs)] }
2006        } else {
2007            quote! {}
2008        };
2009        let cfg_guard = match (tier.target_arch, &tier.feature_gate) {
2010            (Some(arch), Some(feat)) => quote! {
2011                #[cfg(target_arch = #arch)]
2012                #allow_attr
2013                #[cfg(feature = #feat)]
2014            },
2015            (Some(arch), None) => quote! { #[cfg(target_arch = #arch)] },
2016            (None, Some(feat)) => quote! {
2017                #allow_attr
2018                #[cfg(feature = #feat)]
2019            },
2020            (None, None) => quote! {},
2021        };
2022
2023        variants.push(if tier.name != "scalar" {
2024            // Non-scalar variants get #[arcane] so target_feature is applied
2025            quote! {
2026                #cfg_guard
2027                #[archmage::arcane]
2028                #variant_tokens
2029            }
2030        } else {
2031            quote! {
2032                #cfg_guard
2033                #variant_tokens
2034            }
2035        });
2036    }
2037
2038    // Remove attributes from the list that should not be duplicated
2039    let filtered_attrs: Vec<_> = fn_attrs
2040        .iter()
2041        .filter(|a| !a.path().is_ident("magetypes"))
2042        .collect();
2043
2044    let output = quote! {
2045        #(#filtered_attrs)*
2046        #(#variants)*
2047    };
2048
2049    output.into()
2050}
2051
2052// =============================================================================
2053// incant! macro - dispatch to platform-specific variants
2054// =============================================================================
2055
2056// =============================================================================
2057// Tier descriptors for incant! and #[magetypes]
2058// =============================================================================
2059
2060/// Describes a dispatch tier for incant! and #[magetypes].
2061struct TierDescriptor {
2062    /// Tier name as written in user code (e.g., "v3", "neon")
2063    name: &'static str,
2064    /// Function suffix (e.g., "v3", "neon", "scalar")
2065    suffix: &'static str,
2066    /// Token type path (e.g., "archmage::X64V3Token")
2067    token_path: &'static str,
2068    /// IntoConcreteToken method name (e.g., "as_x64v3")
2069    as_method: &'static str,
2070    /// Target architecture for cfg guard (None = no guard)
2071    target_arch: Option<&'static str>,
2072    /// Cargo feature required for this tier's functions to exist.
2073    /// When a tier is resolved as "optional" (e.g., `v4?` in incant! or in default
2074    /// tier lists), dispatch is wrapped in `#[allow(unexpected_cfgs)] #[cfg(feature = "...")]`.
2075    /// This checks the CALLING crate's features — matching the cfg on the function definitions.
2076    cfg_feature: Option<&'static str>,
2077    /// Dispatch priority (higher = tried first within same arch)
2078    priority: u32,
2079}
2080
2081/// All known tiers in dispatch-priority order (highest first within arch).
2082const ALL_TIERS: &[TierDescriptor] = &[
2083    // x86: highest to lowest
2084    TierDescriptor {
2085        name: "v4x",
2086        suffix: "v4x",
2087        token_path: "archmage::X64V4xToken",
2088        as_method: "as_x64v4x",
2089        target_arch: Some("x86_64"),
2090        cfg_feature: Some("avx512"),
2091        priority: 50,
2092    },
2093    TierDescriptor {
2094        name: "v4",
2095        suffix: "v4",
2096        token_path: "archmage::X64V4Token",
2097        as_method: "as_x64v4",
2098        target_arch: Some("x86_64"),
2099        cfg_feature: Some("avx512"),
2100        priority: 40,
2101    },
2102    TierDescriptor {
2103        name: "v3_crypto",
2104        suffix: "v3_crypto",
2105        token_path: "archmage::X64V3CryptoToken",
2106        as_method: "as_x64v3_crypto",
2107        target_arch: Some("x86_64"),
2108
2109        cfg_feature: None,
2110        priority: 35,
2111    },
2112    TierDescriptor {
2113        name: "v3",
2114        suffix: "v3",
2115        token_path: "archmage::X64V3Token",
2116        as_method: "as_x64v3",
2117        target_arch: Some("x86_64"),
2118
2119        cfg_feature: None,
2120        priority: 30,
2121    },
2122    TierDescriptor {
2123        name: "x64_crypto",
2124        suffix: "x64_crypto",
2125        token_path: "archmage::X64CryptoToken",
2126        as_method: "as_x64_crypto",
2127        target_arch: Some("x86_64"),
2128
2129        cfg_feature: None,
2130        priority: 25,
2131    },
2132    TierDescriptor {
2133        name: "v2",
2134        suffix: "v2",
2135        token_path: "archmage::X64V2Token",
2136        as_method: "as_x64v2",
2137        target_arch: Some("x86_64"),
2138
2139        cfg_feature: None,
2140        priority: 20,
2141    },
2142    TierDescriptor {
2143        name: "v1",
2144        suffix: "v1",
2145        token_path: "archmage::X64V1Token",
2146        as_method: "as_x64v1",
2147        target_arch: Some("x86_64"),
2148
2149        cfg_feature: None,
2150        priority: 10,
2151    },
2152    // ARM: highest to lowest
2153    TierDescriptor {
2154        name: "arm_v3",
2155        suffix: "arm_v3",
2156        token_path: "archmage::Arm64V3Token",
2157        as_method: "as_arm_v3",
2158        target_arch: Some("aarch64"),
2159
2160        cfg_feature: None,
2161        priority: 50,
2162    },
2163    TierDescriptor {
2164        name: "arm_v2",
2165        suffix: "arm_v2",
2166        token_path: "archmage::Arm64V2Token",
2167        as_method: "as_arm_v2",
2168        target_arch: Some("aarch64"),
2169
2170        cfg_feature: None,
2171        priority: 40,
2172    },
2173    TierDescriptor {
2174        name: "neon_aes",
2175        suffix: "neon_aes",
2176        token_path: "archmage::NeonAesToken",
2177        as_method: "as_neon_aes",
2178        target_arch: Some("aarch64"),
2179
2180        cfg_feature: None,
2181        priority: 30,
2182    },
2183    TierDescriptor {
2184        name: "neon_sha3",
2185        suffix: "neon_sha3",
2186        token_path: "archmage::NeonSha3Token",
2187        as_method: "as_neon_sha3",
2188        target_arch: Some("aarch64"),
2189
2190        cfg_feature: None,
2191        priority: 30,
2192    },
2193    TierDescriptor {
2194        name: "neon_crc",
2195        suffix: "neon_crc",
2196        token_path: "archmage::NeonCrcToken",
2197        as_method: "as_neon_crc",
2198        target_arch: Some("aarch64"),
2199
2200        cfg_feature: None,
2201        priority: 30,
2202    },
2203    TierDescriptor {
2204        name: "neon",
2205        suffix: "neon",
2206        token_path: "archmage::NeonToken",
2207        as_method: "as_neon",
2208        target_arch: Some("aarch64"),
2209
2210        cfg_feature: None,
2211        priority: 20,
2212    },
2213    // WASM
2214    TierDescriptor {
2215        name: "wasm128_relaxed",
2216        suffix: "wasm128_relaxed",
2217        token_path: "archmage::Wasm128RelaxedToken",
2218        as_method: "as_wasm128_relaxed",
2219        target_arch: Some("wasm32"),
2220
2221        cfg_feature: None,
2222        priority: 21,
2223    },
2224    TierDescriptor {
2225        name: "wasm128",
2226        suffix: "wasm128",
2227        token_path: "archmage::Wasm128Token",
2228        as_method: "as_wasm128",
2229        target_arch: Some("wasm32"),
2230
2231        cfg_feature: None,
2232        priority: 20,
2233    },
2234    // Scalar (always last)
2235    TierDescriptor {
2236        name: "scalar",
2237        suffix: "scalar",
2238        token_path: "archmage::ScalarToken",
2239        as_method: "as_scalar",
2240        target_arch: None,
2241
2242        cfg_feature: None,
2243        priority: 0,
2244    },
2245];
2246
2247/// Default tiers for all dispatch macros. Always includes v4 in the list —
2248/// `resolve_tiers` with `skip_avx512=true` filters it out when the feature is off.
2249const DEFAULT_TIER_NAMES: &[&str] = &["v4", "v3", "neon", "wasm128", "scalar"];
2250
2251/// Whether `incant!` requires `scalar` in explicit tier lists.
2252/// Currently false for backwards compatibility. Flip to true in v1.0.
2253const REQUIRE_EXPLICIT_SCALAR: bool = false;
2254
2255/// Parse a comma-separated list of tier names, each optionally followed by
2256/// `(feature)` for cfg-gating: `v4(avx512), v3, neon(simd), scalar`.
2257fn parse_tier_names(input: ParseStream) -> syn::Result<Vec<String>> {
2258    let mut names = Vec::new();
2259    while !input.is_empty() {
2260        let ident: Ident = input.parse()?;
2261        let name = if input.peek(syn::token::Paren) {
2262            let paren_content;
2263            syn::parenthesized!(paren_content in input);
2264            let feat: Ident = paren_content.parse()?;
2265            format!("{}({})", ident, feat)
2266        } else {
2267            ident.to_string()
2268        };
2269        names.push(name);
2270        if input.peek(Token![,]) {
2271            let _: Token![,] = input.parse()?;
2272        }
2273    }
2274    Ok(names)
2275}
2276
2277/// Look up a tier by name, returning an error on unknown names.
2278fn find_tier(name: &str) -> Option<&'static TierDescriptor> {
2279    ALL_TIERS.iter().find(|t| t.name == name)
2280}
2281
2282/// A resolved tier with its optional flag.
2283#[derive(Clone)]
2284struct ResolvedTier {
2285    tier: &'static TierDescriptor,
2286    /// When Some, dispatch/generation is wrapped in `#[cfg(feature = "...")]`
2287    /// so it's eliminated when the calling crate doesn't define the feature.
2288    /// Set explicitly via `v4(avx512)` syntax or implicitly from `cfg_feature`
2289    /// on the TierDescriptor when using default tier lists.
2290    feature_gate: Option<String>,
2291    /// When true, `#[allow(unexpected_cfgs)]` is added before the `#[cfg]`.
2292    /// True for implicit gates (from defaults), false for explicit `tier(feat)`.
2293    allow_unexpected_cfg: bool,
2294}
2295
2296impl core::ops::Deref for ResolvedTier {
2297    type Target = TierDescriptor;
2298    fn deref(&self) -> &TierDescriptor {
2299        self.tier
2300    }
2301}
2302
2303/// Resolve tier names to descriptors, sorted by dispatch priority (highest first).
2304/// Always appends "scalar" if not already present.
2305///
2306/// Tier names can include a feature gate: `v4(avx512)` wraps dispatch/generation
2307/// in `#[allow(unexpected_cfgs)] #[cfg(feature = "avx512")]`. Any feature name
2308/// works: `neon(simd)`, `wasm128(wasm)`, etc. Without parentheses, the tier is
2309/// unconditional.
2310///
2311/// When `default_feature_gates` is true, tiers with `cfg_feature` in their
2312/// descriptor automatically get that as their feature gate, even without explicit
2313/// `(feature)` syntax. Used for default tier lists — v4/v4x auto-get `(avx512)`.
2314fn resolve_tiers(
2315    tier_names: &[String],
2316    error_span: proc_macro2::Span,
2317    default_feature_gates: bool,
2318) -> syn::Result<Vec<ResolvedTier>> {
2319    let mut tiers = Vec::new();
2320    for raw_name in tier_names {
2321        // Parse "tier(feature)" or plain "tier"
2322        let (name, explicit_gate) = if let Some(paren_pos) = raw_name.find('(') {
2323            let tier_name = &raw_name[..paren_pos];
2324            let feat = raw_name[paren_pos + 1..].trim_end_matches(')');
2325            (tier_name, Some(feat.to_string()))
2326        } else {
2327            (raw_name.as_str(), None)
2328        };
2329        match find_tier(name) {
2330            Some(tier) => {
2331                let is_explicit = explicit_gate.is_some();
2332                let feature_gate = explicit_gate.or_else(|| {
2333                    if default_feature_gates {
2334                        tier.cfg_feature.map(String::from)
2335                    } else {
2336                        None
2337                    }
2338                });
2339                tiers.push(ResolvedTier {
2340                    tier,
2341                    allow_unexpected_cfg: feature_gate.is_some() && !is_explicit,
2342                    feature_gate,
2343                });
2344            }
2345            None => {
2346                let known: Vec<&str> = ALL_TIERS.iter().map(|t| t.name).collect();
2347                return Err(syn::Error::new(
2348                    error_span,
2349                    format!("unknown tier `{}`. Known tiers: {}", name, known.join(", ")),
2350                ));
2351            }
2352        }
2353    }
2354
2355    // Always include scalar fallback
2356    if !tiers.iter().any(|rt| rt.tier.name == "scalar") {
2357        tiers.push(ResolvedTier {
2358            tier: find_tier("scalar").unwrap(),
2359            feature_gate: None,
2360            allow_unexpected_cfg: false,
2361        });
2362    }
2363
2364    // Sort by priority (highest first) for correct dispatch order
2365    tiers.sort_by(|a, b| b.tier.priority.cmp(&a.tier.priority));
2366
2367    Ok(tiers)
2368}
2369
2370// =============================================================================
2371// incant! macro - dispatch to platform-specific variants
2372// =============================================================================
2373
2374/// Input for the incant! macro
2375struct IncantInput {
2376    /// Function path to call (e.g. `func` or `module::func`)
2377    func_path: syn::Path,
2378    /// Arguments to pass
2379    args: Vec<syn::Expr>,
2380    /// Optional token variable for passthrough mode
2381    with_token: Option<syn::Expr>,
2382    /// Optional explicit tier list (None = default tiers)
2383    tiers: Option<(Vec<String>, proc_macro2::Span)>,
2384}
2385
2386/// Create a suffixed version of a function path.
2387/// e.g. `module::func` + `"v3"` → `module::func_v3`
2388fn suffix_path(path: &syn::Path, suffix: &str) -> syn::Path {
2389    let mut suffixed = path.clone();
2390    if let Some(last) = suffixed.segments.last_mut() {
2391        last.ident = format_ident!("{}_{}", last.ident, suffix);
2392    }
2393    suffixed
2394}
2395
2396impl Parse for IncantInput {
2397    fn parse(input: ParseStream) -> syn::Result<Self> {
2398        // Parse: function_path(arg1, arg2, ...) [with token_expr] [, [tier1, tier2, ...]]
2399        let func_path: syn::Path = input.parse()?;
2400
2401        // Parse parenthesized arguments
2402        let content;
2403        syn::parenthesized!(content in input);
2404        let args = content
2405            .parse_terminated(syn::Expr::parse, Token![,])?
2406            .into_iter()
2407            .collect();
2408
2409        // Check for optional "with token"
2410        let with_token = if input.peek(Ident) {
2411            let kw: Ident = input.parse()?;
2412            if kw != "with" {
2413                return Err(syn::Error::new_spanned(kw, "expected `with` keyword"));
2414            }
2415            Some(input.parse()?)
2416        } else {
2417            None
2418        };
2419
2420        // Check for optional tier list: , [tier1, tier2(feature), ...]
2421        // tier(feature) wraps dispatch in #[cfg(feature = "feature")].
2422        // Example: [v4(avx512), v3, neon(simd), scalar]
2423        let tiers = if input.peek(Token![,]) {
2424            let _: Token![,] = input.parse()?;
2425            let bracket_content;
2426            let bracket = syn::bracketed!(bracket_content in input);
2427            let mut tier_names = Vec::new();
2428            while !bracket_content.is_empty() {
2429                let ident: Ident = bracket_content.parse()?;
2430                let name = if bracket_content.peek(syn::token::Paren) {
2431                    // Parse tier(feature) — feature gate syntax
2432                    let paren_content;
2433                    syn::parenthesized!(paren_content in bracket_content);
2434                    let feat: Ident = paren_content.parse()?;
2435                    format!("{}({})", ident, feat)
2436                } else {
2437                    ident.to_string()
2438                };
2439                tier_names.push(name);
2440                if bracket_content.peek(Token![,]) {
2441                    let _: Token![,] = bracket_content.parse()?;
2442                }
2443            }
2444            Some((tier_names, bracket.span.join()))
2445        } else {
2446            None
2447        };
2448
2449        Ok(IncantInput {
2450            func_path,
2451            args,
2452            with_token,
2453            tiers,
2454        })
2455    }
2456}
2457
2458/// Dispatch to platform-specific SIMD variants.
2459///
2460/// # Entry Point Mode (no token yet)
2461///
2462/// Summons tokens and dispatches to the best available variant:
2463///
2464/// ```rust,ignore
2465/// pub fn public_api(data: &[f32]) -> f32 {
2466///     incant!(dot(data))
2467/// }
2468/// ```
2469///
2470/// Expands to runtime feature detection + dispatch to `dot_v3`, `dot_v4`,
2471/// `dot_neon`, `dot_wasm128`, or `dot_scalar`.
2472///
2473/// # Explicit Tiers
2474///
2475/// Specify which tiers to dispatch to:
2476///
2477/// ```rust,ignore
2478/// // Only dispatch to v1, v3, neon, and scalar
2479/// pub fn api(data: &[f32]) -> f32 {
2480///     incant!(process(data), [v1, v3, neon, scalar])
2481/// }
2482/// ```
2483///
2484/// Always include `scalar` in explicit tier lists — `incant!` always
2485/// emits a `fn_scalar()` call as the final fallback, and listing it
2486/// documents this dependency. Currently auto-appended if omitted;
2487/// will become a compile error in v1.0. Unknown tier names cause a
2488/// compile error. Tiers are automatically sorted into correct
2489/// dispatch order (highest priority first).
2490///
2491/// Known tiers: `v1`, `v2`, `v3`, `v4`, `v4x`, `neon`, `neon_aes`,
2492/// `neon_sha3`, `neon_crc`, `wasm128`, `wasm128_relaxed`, `scalar`.
2493///
2494/// # Passthrough Mode (already have token)
2495///
2496/// Uses compile-time dispatch via `IntoConcreteToken`:
2497///
2498/// ```rust,ignore
2499/// #[arcane]
2500/// fn outer(token: X64V3Token, data: &[f32]) -> f32 {
2501///     incant!(inner(data) with token)
2502/// }
2503/// ```
2504///
2505/// Also supports explicit tiers:
2506///
2507/// ```rust,ignore
2508/// fn inner<T: IntoConcreteToken>(token: T, data: &[f32]) -> f32 {
2509///     incant!(process(data) with token, [v3, neon, scalar])
2510/// }
2511/// ```
2512///
2513/// The compiler monomorphizes the dispatch, eliminating non-matching branches.
2514///
2515/// # Variant Naming
2516///
2517/// Functions must have suffixed variants matching the selected tiers:
2518/// - `_v1` for `X64V1Token`
2519/// - `_v2` for `X64V2Token`
2520/// - `_v3` for `X64V3Token`
2521/// - `_v4` for `X64V4Token` (requires `avx512` feature)
2522/// - `_v4x` for `X64V4xToken` (requires `avx512` feature)
2523/// - `_neon` for `NeonToken`
2524/// - `_neon_aes` for `NeonAesToken`
2525/// - `_neon_sha3` for `NeonSha3Token`
2526/// - `_neon_crc` for `NeonCrcToken`
2527/// - `_wasm128` for `Wasm128Token`
2528/// - `_scalar` for `ScalarToken`
2529#[proc_macro]
2530pub fn incant(input: TokenStream) -> TokenStream {
2531    let input = parse_macro_input!(input as IncantInput);
2532    incant_impl(input)
2533}
2534
2535/// Legacy alias for [`incant!`].
2536#[proc_macro]
2537pub fn simd_route(input: TokenStream) -> TokenStream {
2538    let input = parse_macro_input!(input as IncantInput);
2539    incant_impl(input)
2540}
2541
2542/// Descriptive alias for [`incant!`].
2543///
2544/// Dispatches to architecture-specific function variants at runtime.
2545/// Looks for suffixed functions (`_v3`, `_v4`, `_neon`, `_wasm128`, `_scalar`)
2546/// and calls the best one the CPU supports.
2547///
2548/// See [`incant!`] for full documentation and examples.
2549#[proc_macro]
2550pub fn dispatch_variant(input: TokenStream) -> TokenStream {
2551    let input = parse_macro_input!(input as IncantInput);
2552    incant_impl(input)
2553}
2554
2555fn incant_impl(input: IncantInput) -> TokenStream {
2556    let func_path = &input.func_path;
2557    let args = &input.args;
2558
2559    // Resolve tiers
2560    let tier_names: Vec<String> = match &input.tiers {
2561        Some((names, _)) => names.clone(),
2562        None => DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect(),
2563    };
2564    let last_segment_span = func_path
2565        .segments
2566        .last()
2567        .map(|s| s.ident.span())
2568        .unwrap_or_else(proc_macro2::Span::call_site);
2569    let error_span = input
2570        .tiers
2571        .as_ref()
2572        .map(|(_, span)| *span)
2573        .unwrap_or(last_segment_span);
2574
2575    // When the user specifies explicit tiers, require `scalar` in the list.
2576    // This forces acknowledgment that a scalar fallback path exists and must
2577    // be implemented. Default tiers (no bracket list) always include scalar.
2578    // TODO(v1.0): flip REQUIRE_EXPLICIT_SCALAR to true
2579    if REQUIRE_EXPLICIT_SCALAR
2580        && let Some((names, span)) = &input.tiers
2581        && !names.iter().any(|n| n == "scalar")
2582    {
2583        return syn::Error::new(
2584            *span,
2585            "explicit tier list must include `scalar`. \
2586             incant! always dispatches to fn_scalar() as the final fallback, \
2587             so `scalar` must appear in the tier list to acknowledge this. \
2588             Example: [v3, neon, scalar]",
2589        )
2590        .to_compile_error()
2591        .into();
2592    }
2593
2594    // default_optional: when using default tiers (no explicit list), tiers with
2595    // cfg_feature are automatically optional. With explicit lists, the user controls
2596    // optionality via `?` suffix (e.g., `[v4?, v3, scalar]`).
2597    let default_optional = input.tiers.is_none();
2598    let tiers = match resolve_tiers(&tier_names, error_span, default_optional) {
2599        Ok(t) => t,
2600        Err(e) => return e.to_compile_error().into(),
2601    };
2602
2603    // Group tiers by architecture for cfg-guarded blocks
2604    // Within each arch, tiers are already sorted by priority (highest first)
2605    if let Some(token_expr) = &input.with_token {
2606        gen_incant_passthrough(func_path, args, token_expr, &tiers)
2607    } else {
2608        gen_incant_entry(func_path, args, &tiers)
2609    }
2610}
2611
2612/// Generate incant! passthrough mode (already have a token).
2613fn gen_incant_passthrough(
2614    func_path: &syn::Path,
2615    args: &[syn::Expr],
2616    token_expr: &syn::Expr,
2617    tiers: &[ResolvedTier],
2618) -> TokenStream {
2619    let mut dispatch_arms = Vec::new();
2620
2621    // Group non-scalar tiers by target_arch for cfg blocks
2622    let mut arch_groups: Vec<(Option<&str>, Vec<&ResolvedTier>)> = Vec::new();
2623    for rt in tiers {
2624        if rt.name == "scalar" {
2625            continue; // Handle scalar separately at the end
2626        }
2627        if let Some(group) = arch_groups.iter_mut().find(|(a, _)| *a == rt.target_arch) {
2628            group.1.push(rt);
2629        } else {
2630            arch_groups.push((rt.target_arch, vec![rt]));
2631        }
2632    }
2633
2634    for (target_arch, group_tiers) in &arch_groups {
2635        let mut tier_checks = Vec::new();
2636        for rt in group_tiers {
2637            let fn_suffixed = suffix_path(func_path, rt.suffix);
2638            let as_method = format_ident!("{}", rt.as_method);
2639
2640            let check = quote! {
2641                if let Some(__t) = __incant_token.#as_method() {
2642                    break '__incant #fn_suffixed(__t, #(#args),*);
2643                }
2644            };
2645
2646            if let Some(feat) = &rt.feature_gate {
2647                let allow_attr = if rt.allow_unexpected_cfg {
2648                    quote! { #[allow(unexpected_cfgs)] }
2649                } else {
2650                    quote! {}
2651                };
2652                tier_checks.push(quote! {
2653                    #allow_attr
2654                    #[cfg(feature = #feat)]
2655                    { #check }
2656                });
2657            } else {
2658                tier_checks.push(check);
2659            }
2660        }
2661
2662        let inner = quote! { #(#tier_checks)* };
2663
2664        if let Some(arch) = target_arch {
2665            dispatch_arms.push(quote! {
2666                #[cfg(target_arch = #arch)]
2667                { #inner }
2668            });
2669        } else {
2670            dispatch_arms.push(inner);
2671        }
2672    }
2673
2674    // Scalar fallback (always last)
2675    let fn_scalar = suffix_path(func_path, "scalar");
2676    let scalar_arm = if tiers.iter().any(|t| t.name == "scalar") {
2677        quote! {
2678            if let Some(__t) = __incant_token.as_scalar() {
2679                break '__incant #fn_scalar(__t, #(#args),*);
2680            }
2681            unreachable!("Token did not match any known variant")
2682        }
2683    } else {
2684        quote! { unreachable!("Token did not match any known variant") }
2685    };
2686
2687    let expanded = quote! {
2688        '__incant: {
2689            use archmage::IntoConcreteToken;
2690            let __incant_token = #token_expr;
2691            #(#dispatch_arms)*
2692            #scalar_arm
2693        }
2694    };
2695    expanded.into()
2696}
2697
2698/// Generate incant! entry point mode (summon tokens).
2699fn gen_incant_entry(
2700    func_path: &syn::Path,
2701    args: &[syn::Expr],
2702    tiers: &[ResolvedTier],
2703) -> TokenStream {
2704    let mut dispatch_arms = Vec::new();
2705
2706    // Group non-scalar tiers by target_arch for cfg blocks.
2707    let mut arch_groups: Vec<(Option<&str>, Vec<&ResolvedTier>)> = Vec::new();
2708    for rt in tiers {
2709        if rt.name == "scalar" {
2710            continue;
2711        }
2712        if let Some(group) = arch_groups.iter_mut().find(|(a, _)| *a == rt.target_arch) {
2713            group.1.push(rt);
2714        } else {
2715            arch_groups.push((rt.target_arch, vec![rt]));
2716        }
2717    }
2718
2719    for (target_arch, group_tiers) in &arch_groups {
2720        let mut tier_checks = Vec::new();
2721        for rt in group_tiers {
2722            let fn_suffixed = suffix_path(func_path, rt.suffix);
2723            let token_path: syn::Path = syn::parse_str(rt.token_path).unwrap();
2724
2725            let check = quote! {
2726                if let Some(__t) = #token_path::summon() {
2727                    break '__incant #fn_suffixed(__t, #(#args),*);
2728                }
2729            };
2730
2731            if let Some(feat) = &rt.feature_gate {
2732                let allow_attr = if rt.allow_unexpected_cfg {
2733                    quote! { #[allow(unexpected_cfgs)] }
2734                } else {
2735                    quote! {}
2736                };
2737                tier_checks.push(quote! {
2738                    #allow_attr
2739                    #[cfg(feature = #feat)]
2740                    { #check }
2741                });
2742            } else {
2743                tier_checks.push(check);
2744            }
2745        }
2746
2747        let inner = quote! { #(#tier_checks)* };
2748
2749        if let Some(arch) = target_arch {
2750            dispatch_arms.push(quote! {
2751                #[cfg(target_arch = #arch)]
2752                { #inner }
2753            });
2754        } else {
2755            dispatch_arms.push(inner);
2756        }
2757    }
2758
2759    // Scalar fallback
2760    let fn_scalar = suffix_path(func_path, "scalar");
2761
2762    let expanded = quote! {
2763        '__incant: {
2764            use archmage::SimdToken;
2765            #(#dispatch_arms)*
2766            #fn_scalar(archmage::ScalarToken, #(#args),*)
2767        }
2768    };
2769    expanded.into()
2770}
2771
2772// =============================================================================
2773// autoversion - combined variant generation + dispatch
2774// =============================================================================
2775
2776/// Arguments to the `#[autoversion]` macro.
2777struct AutoversionArgs {
2778    /// The concrete type to use for `self` receiver (inherent methods only).
2779    self_type: Option<Type>,
2780    /// Explicit tier names (None = default tiers).
2781    tiers: Option<Vec<String>>,
2782    /// When set, emit full autoversion under `#[cfg(feature = "...")]` and a
2783    /// plain scalar fallback under `#[cfg(not(feature = "..."))]`. Solves the
2784    /// hygiene issue with `macro_rules!` wrappers.
2785    cfg_feature: Option<String>,
2786}
2787
2788impl Parse for AutoversionArgs {
2789    fn parse(input: ParseStream) -> syn::Result<Self> {
2790        let mut self_type = None;
2791        let mut tier_names = Vec::new();
2792        let mut cfg_feature = None;
2793
2794        while !input.is_empty() {
2795            let ident: Ident = input.parse()?;
2796            if ident == "_self" {
2797                let _: Token![=] = input.parse()?;
2798                self_type = Some(input.parse()?);
2799            } else if ident == "cfg" {
2800                let content;
2801                syn::parenthesized!(content in input);
2802                let feat: Ident = content.parse()?;
2803                cfg_feature = Some(feat.to_string());
2804            } else {
2805                // Treat as tier name, optionally with (feature) gate
2806                let name = if input.peek(syn::token::Paren) {
2807                    let paren_content;
2808                    syn::parenthesized!(paren_content in input);
2809                    let feat: Ident = paren_content.parse()?;
2810                    format!("{}({})", ident, feat)
2811                } else {
2812                    ident.to_string()
2813                };
2814                tier_names.push(name);
2815            }
2816            if input.peek(Token![,]) {
2817                let _: Token![,] = input.parse()?;
2818            }
2819        }
2820
2821        Ok(AutoversionArgs {
2822            self_type,
2823            tiers: if tier_names.is_empty() {
2824                None
2825            } else {
2826                Some(tier_names)
2827            },
2828            cfg_feature,
2829        })
2830    }
2831}
2832
2833/// Information about the `SimdToken` parameter found in a function signature.
2834struct SimdTokenParamInfo {
2835    /// Index of the parameter in `sig.inputs`
2836    index: usize,
2837    /// The parameter identifier
2838    #[allow(dead_code)]
2839    ident: Ident,
2840}
2841
2842/// Find the `SimdToken` parameter in a function signature.
2843///
2844/// Searches all typed parameters for one whose type path ends in `SimdToken`.
2845/// Returns the parameter index and identifier, or `None` if not found.
2846fn find_simd_token_param(sig: &Signature) -> Option<SimdTokenParamInfo> {
2847    for (i, arg) in sig.inputs.iter().enumerate() {
2848        if let FnArg::Typed(PatType { pat, ty, .. }) = arg
2849            && let Type::Path(type_path) = ty.as_ref()
2850            && let Some(seg) = type_path.path.segments.last()
2851            && seg.ident == "SimdToken"
2852        {
2853            let ident = match pat.as_ref() {
2854                syn::Pat::Ident(pi) => pi.ident.clone(),
2855                syn::Pat::Wild(w) => Ident::new("__autoversion_token", w.underscore_token.span),
2856                _ => continue,
2857            };
2858            return Some(SimdTokenParamInfo { index: i, ident });
2859        }
2860    }
2861    None
2862}
2863
2864/// Core implementation for `#[autoversion]`.
2865///
2866/// Generates suffixed SIMD variants (like `#[magetypes]`) and a runtime
2867/// dispatcher function (like `incant!`) from a single annotated function.
2868fn autoversion_impl(mut input_fn: LightFn, args: AutoversionArgs) -> TokenStream {
2869    // Check for self receiver
2870    let has_self = input_fn
2871        .sig
2872        .inputs
2873        .first()
2874        .is_some_and(|arg| matches!(arg, FnArg::Receiver(_)));
2875
2876    // _self = Type is only needed for trait impls (nested mode in #[arcane]).
2877    // For inherent methods, self/Self work naturally in sibling mode.
2878
2879    // Find SimdToken parameter
2880    let token_param = match find_simd_token_param(&input_fn.sig) {
2881        Some(p) => p,
2882        None => {
2883            return syn::Error::new_spanned(
2884                &input_fn.sig,
2885                "autoversion requires a `SimdToken` parameter.\n\
2886                 Example: fn process(token: SimdToken, data: &[f32]) -> f32 { ... }\n\n\
2887                 SimdToken is the dispatch placeholder — autoversion replaces it \
2888                 with concrete token types and generates a runtime dispatcher.",
2889            )
2890            .to_compile_error()
2891            .into();
2892        }
2893    };
2894
2895    // Resolve tiers — autoversion always includes v4 in its defaults because it
2896    // generates scalar code compiled with #[target_feature], not import_intrinsics.
2897    let tier_names: Vec<String> = match &args.tiers {
2898        Some(names) => names.clone(),
2899        None => DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect(),
2900    };
2901    // autoversion never skips avx512 — it generates scalar code with #[target_feature]
2902    let tiers = match resolve_tiers(&tier_names, input_fn.sig.ident.span(), false) {
2903        Ok(t) => t,
2904        Err(e) => return e.to_compile_error().into(),
2905    };
2906
2907    // Strip #[arcane] / #[rite] to prevent double-wrapping
2908    input_fn
2909        .attrs
2910        .retain(|attr| !attr.path().is_ident("arcane") && !attr.path().is_ident("rite"));
2911
2912    let fn_name = &input_fn.sig.ident;
2913    let vis = input_fn.vis.clone();
2914
2915    // Move attrs to dispatcher only; variants get no user attrs
2916    let fn_attrs: Vec<Attribute> = input_fn.attrs.drain(..).collect();
2917
2918    // =========================================================================
2919    // Generate suffixed variants
2920    // =========================================================================
2921    //
2922    // AST manipulation only — we clone the parsed LightFn and swap the token
2923    // param's type annotation. No serialize/reparse round-trip. The body is
2924    // never touched unless _self = Type requires a `let _self = self;`
2925    // preamble on the scalar variant.
2926
2927    let mut variants = Vec::new();
2928
2929    for tier in &tiers {
2930        let mut variant_fn = input_fn.clone();
2931
2932        // Variants are always private — only the dispatcher is public.
2933        variant_fn.vis = syn::Visibility::Inherited;
2934
2935        // Rename: process → process_v3
2936        variant_fn.sig.ident = format_ident!("{}_{}", fn_name, tier.suffix);
2937
2938        // Replace SimdToken param type with concrete token type
2939        let concrete_type: Type = syn::parse_str(tier.token_path).unwrap();
2940        if let FnArg::Typed(pt) = &mut variant_fn.sig.inputs[token_param.index] {
2941            *pt.ty = concrete_type;
2942        }
2943
2944        // Scalar with _self = Type: inject `let _self = self;` preamble so body's
2945        // _self references resolve (non-scalar variants get this from #[arcane(_self = Type)])
2946        if tier.name == "scalar" && has_self && args.self_type.is_some() {
2947            let original_body = variant_fn.body.clone();
2948            variant_fn.body = quote!(let _self = self; #original_body);
2949        }
2950
2951        // cfg guard: arch + optional feature gate from tier(feature) syntax
2952        let allow_attr = if tier.allow_unexpected_cfg {
2953            quote! { #[allow(unexpected_cfgs)] }
2954        } else {
2955            quote! {}
2956        };
2957        let cfg_guard = match (tier.target_arch, &tier.feature_gate) {
2958            (Some(arch), Some(feat)) => quote! {
2959                #[cfg(target_arch = #arch)]
2960                #allow_attr
2961                #[cfg(feature = #feat)]
2962            },
2963            (Some(arch), None) => quote! { #[cfg(target_arch = #arch)] },
2964            (None, Some(feat)) => quote! {
2965                #allow_attr
2966                #[cfg(feature = #feat)]
2967            },
2968            (None, None) => quote! {},
2969        };
2970
2971        // All variants are private implementation details of the dispatcher.
2972        // Suppress dead_code: if the dispatcher is unused, rustc warns on IT
2973        // (via quote_spanned! with the user's span). Warning on individual
2974        // variants would be confusing — the user didn't write _scalar or _v3.
2975        if tier.name != "scalar" {
2976            let arcane_attr = if let Some(ref self_type) = args.self_type {
2977                quote! { #[archmage::arcane(_self = #self_type)] }
2978            } else {
2979                quote! { #[archmage::arcane] }
2980            };
2981            variants.push(quote! {
2982                #cfg_guard
2983                #[allow(dead_code)]
2984                #arcane_attr
2985                #variant_fn
2986            });
2987        } else {
2988            variants.push(quote! {
2989                #cfg_guard
2990                #[allow(dead_code)]
2991                #variant_fn
2992            });
2993        }
2994    }
2995
2996    // =========================================================================
2997    // Generate dispatcher (adapted from gen_incant_entry)
2998    // =========================================================================
2999
3000    // Build dispatcher inputs: original params minus SimdToken
3001    let mut dispatcher_inputs: Vec<FnArg> = input_fn.sig.inputs.iter().cloned().collect();
3002    dispatcher_inputs.remove(token_param.index);
3003
3004    // Rename wildcard params so we can pass them as arguments
3005    let mut wild_counter = 0u32;
3006    for arg in &mut dispatcher_inputs {
3007        if let FnArg::Typed(pat_type) = arg
3008            && matches!(pat_type.pat.as_ref(), syn::Pat::Wild(_))
3009        {
3010            let ident = format_ident!("__autoversion_wild_{}", wild_counter);
3011            wild_counter += 1;
3012            *pat_type.pat = syn::Pat::Ident(syn::PatIdent {
3013                attrs: vec![],
3014                by_ref: None,
3015                mutability: None,
3016                ident,
3017                subpat: None,
3018            });
3019        }
3020    }
3021
3022    // Collect argument idents for dispatch calls (exclude self receiver)
3023    let dispatch_args: Vec<Ident> = dispatcher_inputs
3024        .iter()
3025        .filter_map(|arg| {
3026            if let FnArg::Typed(PatType { pat, .. }) = arg
3027                && let syn::Pat::Ident(pi) = pat.as_ref()
3028            {
3029                return Some(pi.ident.clone());
3030            }
3031            None
3032        })
3033        .collect();
3034
3035    // Build turbofish for forwarding type/const generics to variant calls
3036    let turbofish = build_turbofish(&input_fn.sig.generics);
3037
3038    // Group non-scalar tiers by target_arch for cfg blocks
3039    let mut arch_groups: Vec<(Option<&str>, Vec<&ResolvedTier>)> = Vec::new();
3040    for tier in &tiers {
3041        if tier.name == "scalar" {
3042            continue;
3043        }
3044        if let Some(group) = arch_groups.iter_mut().find(|(a, _)| *a == tier.target_arch) {
3045            group.1.push(tier);
3046        } else {
3047            arch_groups.push((tier.target_arch, vec![tier]));
3048        }
3049    }
3050
3051    let mut dispatch_arms = Vec::new();
3052    for (target_arch, group_tiers) in &arch_groups {
3053        let mut tier_checks = Vec::new();
3054        for rt in group_tiers {
3055            let suffixed = format_ident!("{}_{}", fn_name, rt.suffix);
3056            let token_path: syn::Path = syn::parse_str(rt.token_path).unwrap();
3057
3058            let call = if has_self {
3059                quote! { self.#suffixed #turbofish(__t, #(#dispatch_args),*) }
3060            } else {
3061                quote! { #suffixed #turbofish(__t, #(#dispatch_args),*) }
3062            };
3063
3064            let check = quote! {
3065                if let Some(__t) = #token_path::summon() {
3066                    return #call;
3067                }
3068            };
3069
3070            if let Some(feat) = &rt.feature_gate {
3071                let allow_attr = if rt.allow_unexpected_cfg {
3072                    quote! { #[allow(unexpected_cfgs)] }
3073                } else {
3074                    quote! {}
3075                };
3076                tier_checks.push(quote! {
3077                    #allow_attr
3078                    #[cfg(feature = #feat)]
3079                    { #check }
3080                });
3081            } else {
3082                tier_checks.push(check);
3083            }
3084        }
3085
3086        let inner = quote! { #(#tier_checks)* };
3087
3088        if let Some(arch) = target_arch {
3089            dispatch_arms.push(quote! {
3090                #[cfg(target_arch = #arch)]
3091                { #inner }
3092            });
3093        } else {
3094            dispatch_arms.push(inner);
3095        }
3096    }
3097
3098    // Scalar fallback (always available, no summon needed)
3099    let scalar_name = format_ident!("{}_scalar", fn_name);
3100    let scalar_call = if has_self {
3101        quote! { self.#scalar_name #turbofish(archmage::ScalarToken, #(#dispatch_args),*) }
3102    } else {
3103        quote! { #scalar_name #turbofish(archmage::ScalarToken, #(#dispatch_args),*) }
3104    };
3105
3106    // Build dispatcher function
3107    let dispatcher_inputs_punct: syn::punctuated::Punctuated<FnArg, Token![,]> =
3108        dispatcher_inputs.into_iter().collect();
3109    let output = &input_fn.sig.output;
3110    let generics = &input_fn.sig.generics;
3111    let where_clause = &generics.where_clause;
3112
3113    // Use the user's span for the dispatcher so dead_code lint fires on the
3114    // function the user actually wrote, not on invisible generated variants.
3115    let user_span = fn_name.span();
3116
3117    // autoversion uses `return` instead of `break '__dispatch` — no labeled block
3118    // needed. This avoids label hygiene issues when #[autoversion] is applied inside
3119    // macro_rules! (labels from proc macros can't be seen from macro_rules! contexts).
3120    let dispatcher = if let Some(ref feat) = args.cfg_feature {
3121        // cfg(feature): full dispatch when on, scalar-only when off
3122        quote_spanned! { user_span =>
3123            #[cfg(feature = #feat)]
3124            #(#fn_attrs)*
3125            #vis fn #fn_name #generics (#dispatcher_inputs_punct) #output #where_clause {
3126                use archmage::SimdToken;
3127                #(#dispatch_arms)*
3128                #scalar_call
3129            }
3130
3131            #[cfg(not(feature = #feat))]
3132            #(#fn_attrs)*
3133            #vis fn #fn_name #generics (#dispatcher_inputs_punct) #output #where_clause {
3134                #scalar_call
3135            }
3136        }
3137    } else {
3138        quote_spanned! { user_span =>
3139            #(#fn_attrs)*
3140            #vis fn #fn_name #generics (#dispatcher_inputs_punct) #output #where_clause {
3141                use archmage::SimdToken;
3142                #(#dispatch_arms)*
3143                #scalar_call
3144            }
3145        }
3146    };
3147
3148    let expanded = quote! {
3149        #dispatcher
3150        #(#variants)*
3151    };
3152
3153    expanded.into()
3154}
3155
3156/// Let the compiler auto-vectorize scalar code for each architecture.
3157///
3158/// Write a plain scalar function with a `SimdToken` placeholder parameter.
3159/// `#[autoversion]` generates architecture-specific copies — each compiled
3160/// with different `#[target_feature]` flags via `#[arcane]` — plus a runtime
3161/// dispatcher that calls the best one the CPU supports.
3162///
3163/// You don't touch intrinsics, don't import SIMD types, don't think about
3164/// lane widths. The compiler's auto-vectorizer does the work; you give it
3165/// permission via `#[target_feature]`, which `#[autoversion]` handles.
3166///
3167/// # The simple win
3168///
3169/// ```rust,ignore
3170/// use archmage::SimdToken;
3171///
3172/// #[autoversion]
3173/// fn sum_of_squares(_token: SimdToken, data: &[f32]) -> f32 {
3174///     let mut sum = 0.0f32;
3175///     for &x in data {
3176///         sum += x * x;
3177///     }
3178///     sum
3179/// }
3180///
3181/// // Call directly — no token, no unsafe:
3182/// let result = sum_of_squares(&my_data);
3183/// ```
3184///
3185/// The `_token` parameter is never used in the body. It exists so the macro
3186/// knows where to substitute concrete token types. Each generated variant
3187/// gets `#[arcane]` → `#[target_feature(enable = "avx2,fma,...")]`, which
3188/// unlocks the compiler's auto-vectorizer for that feature set.
3189///
3190/// On x86-64 with the `_v3` variant (AVX2+FMA), that loop compiles to
3191/// `vfmadd231ps` — fused multiply-add on 8 floats per cycle. On aarch64
3192/// with NEON, you get `fmla`. The `_scalar` fallback compiles without any
3193/// SIMD target features, as a safety net for unknown hardware.
3194///
3195/// # Chunks + remainder
3196///
3197/// The classic data-processing pattern works naturally:
3198///
3199/// ```rust,ignore
3200/// #[autoversion]
3201/// fn normalize(_token: SimdToken, data: &mut [f32], scale: f32) {
3202///     // Compiler auto-vectorizes this — no manual SIMD needed.
3203///     // On v3, this becomes vdivps + vmulps on 8 floats at a time.
3204///     for x in data.iter_mut() {
3205///         *x = (*x - 128.0) * scale;
3206///     }
3207/// }
3208/// ```
3209///
3210/// If you want explicit control over chunk boundaries (e.g., for
3211/// accumulator patterns), that works too:
3212///
3213/// ```rust,ignore
3214/// #[autoversion]
3215/// fn dot_product(_token: SimdToken, a: &[f32], b: &[f32]) -> f32 {
3216///     let n = a.len().min(b.len());
3217///     let mut sum = 0.0f32;
3218///     for i in 0..n {
3219///         sum += a[i] * b[i];
3220///     }
3221///     sum
3222/// }
3223/// ```
3224///
3225/// The compiler decides the chunk size based on the target features of each
3226/// variant (8 floats for AVX2, 4 for NEON, 1 for scalar).
3227///
3228/// # What gets generated
3229///
3230/// With default tiers, `#[autoversion] fn process(_t: SimdToken, data: &[f32]) -> f32`
3231/// expands to:
3232///
3233/// - `process_v4(token: X64V4Token, ...)` — AVX-512 (behind `#[cfg(feature = "avx512")]`)
3234/// - `process_v3(token: X64V3Token, ...)` — AVX2+FMA
3235/// - `process_neon(token: NeonToken, ...)` — aarch64 NEON
3236/// - `process_wasm128(token: Wasm128Token, ...)` — WASM SIMD
3237/// - `process_scalar(token: ScalarToken, ...)` — no SIMD, always available
3238/// - `process(data: &[f32]) -> f32` — **dispatcher** (SimdToken param removed)
3239///
3240/// Each non-scalar variant is wrapped in `#[arcane]` (for `#[target_feature]`)
3241/// and `#[cfg(target_arch = ...)]`. The dispatcher does runtime CPU feature
3242/// detection via `Token::summon()` and calls the best match. When compiled
3243/// with `-C target-cpu=native`, the detection is elided by the compiler.
3244///
3245/// The suffixed variants are private sibling functions — only the dispatcher
3246/// is public. Within the same module, you can call them directly for testing
3247/// or benchmarking.
3248///
3249/// # SimdToken replacement
3250///
3251/// `#[autoversion]` replaces the `SimdToken` type annotation in the function
3252/// signature with the concrete token type for each variant (e.g.,
3253/// `archmage::X64V3Token`). Only the parameter's type changes — the function
3254/// body is never reparsed, which keeps compile times low.
3255///
3256/// The token variable (whatever you named it — `token`, `_token`, `_t`)
3257/// keeps working in the body because its type comes from the signature.
3258/// So `f32x8::from_array(token, ...)` works — `token` is now an `X64V3Token`
3259/// which satisfies the same trait bounds as `SimdToken`.
3260///
3261/// `#[magetypes]` takes a different approach: it replaces the text `Token`
3262/// everywhere in the function — signature and body — via string substitution.
3263/// Use `#[magetypes]` when you need body-level type substitution (e.g.,
3264/// `Token`-dependent constants or type aliases that differ per variant).
3265/// Use `#[autoversion]` when you want compiler auto-vectorization of scalar
3266/// code with zero boilerplate.
3267///
3268/// # Benchmarking
3269///
3270/// Measure the speedup with a side-by-side comparison. The generated
3271/// `_scalar` variant serves as the baseline; the dispatcher picks the
3272/// best available:
3273///
3274/// ```rust,ignore
3275/// use criterion::{Criterion, black_box, criterion_group, criterion_main};
3276/// use archmage::SimdToken;
3277///
3278/// #[autoversion]
3279/// fn sum_squares(_token: SimdToken, data: &[f32]) -> f32 {
3280///     data.iter().map(|&x| x * x).fold(0.0f32, |a, b| a + b)
3281/// }
3282///
3283/// fn bench(c: &mut Criterion) {
3284///     let data: Vec<f32> = (0..4096).map(|i| i as f32 * 0.01).collect();
3285///     let mut group = c.benchmark_group("sum_squares");
3286///
3287///     // Dispatched — picks best available at runtime
3288///     group.bench_function("dispatched", |b| {
3289///         b.iter(|| sum_squares(black_box(&data)))
3290///     });
3291///
3292///     // Scalar baseline — no target_feature, no auto-vectorization
3293///     group.bench_function("scalar", |b| {
3294///         b.iter(|| sum_squares_scalar(archmage::ScalarToken, black_box(&data)))
3295///     });
3296///
3297///     // Specific tier (useful for isolating which tier wins)
3298///     #[cfg(target_arch = "x86_64")]
3299///     if let Some(t) = archmage::X64V3Token::summon() {
3300///         group.bench_function("v3_avx2_fma", |b| {
3301///             b.iter(|| sum_squares_v3(t, black_box(&data)));
3302///         });
3303///     }
3304///
3305///     group.finish();
3306/// }
3307///
3308/// criterion_group!(benches, bench);
3309/// criterion_main!(benches);
3310/// ```
3311///
3312/// For a tight numeric loop on x86-64, the `_v3` variant (AVX2+FMA)
3313/// typically runs 4-8x faster than `_scalar` because `#[target_feature]`
3314/// unlocks auto-vectorization that the baseline build can't use.
3315///
3316/// # Explicit tiers
3317///
3318/// ```rust,ignore
3319/// #[autoversion(v3, v4, v4x, neon, arm_v2, wasm128)]
3320/// fn process(_token: SimdToken, data: &[f32]) -> f32 {
3321///     // ...
3322/// }
3323/// ```
3324///
3325/// `scalar` is always included implicitly.
3326///
3327/// Default tiers (when no list given): `v4`, `v3`, `neon`, `wasm128`, `scalar`.
3328///
3329/// Known tiers: `v1`, `v2`, `v3`, `v3_crypto`, `v4`, `v4x`, `neon`,
3330/// `neon_aes`, `neon_sha3`, `neon_crc`, `arm_v2`, `arm_v3`, `wasm128`,
3331/// `wasm128_relaxed`, `x64_crypto`, `scalar`.
3332///
3333/// # Methods with self receivers
3334///
3335/// For inherent methods, `self` works naturally — no `_self` needed:
3336///
3337/// ```rust,ignore
3338/// impl ImageBuffer {
3339///     #[autoversion]
3340///     fn normalize(&mut self, token: SimdToken, gamma: f32) {
3341///         for pixel in &mut self.data {
3342///             *pixel = (*pixel / 255.0).powf(gamma);
3343///         }
3344///     }
3345/// }
3346///
3347/// // Call normally — no token:
3348/// buffer.normalize(2.2);
3349/// ```
3350///
3351/// All receiver types work: `self`, `&self`, `&mut self`. Non-scalar variants
3352/// get `#[arcane]` (sibling mode), where `self`/`Self` resolve naturally.
3353///
3354/// # Trait methods (requires `_self = Type`)
3355///
3356/// Trait methods can't use `#[autoversion]` directly because proc macro
3357/// attributes on trait impl items can't expand to multiple sibling functions.
3358/// Use the delegation pattern with `_self = Type`:
3359///
3360/// ```rust,ignore
3361/// trait Processor {
3362///     fn process(&self, data: &[f32]) -> f32;
3363/// }
3364///
3365/// impl Processor for MyType {
3366///     fn process(&self, data: &[f32]) -> f32 {
3367///         self.process_impl(data) // delegate to autoversioned method
3368///     }
3369/// }
3370///
3371/// impl MyType {
3372///     #[autoversion(_self = MyType)]
3373///     fn process_impl(&self, token: SimdToken, data: &[f32]) -> f32 {
3374///         _self.weights.iter().zip(data).map(|(w, d)| w * d).sum()
3375///     }
3376/// }
3377/// ```
3378///
3379/// `_self = Type` uses nested mode in `#[arcane]`, which is required for
3380/// trait impls. Use `_self` (not `self`) in the body when using this form.
3381///
3382/// # Comparison with `#[magetypes]` + `incant!`
3383///
3384/// | | `#[autoversion]` | `#[magetypes]` + `incant!` |
3385/// |---|---|---|
3386/// | Placeholder | `SimdToken` | `Token` |
3387/// | Generates variants | Yes | Yes (magetypes) |
3388/// | Generates dispatcher | Yes | No (you write `incant!`) |
3389/// | Best for | Scalar auto-vectorization | Explicit SIMD with typed vectors |
3390/// | Lines of code | 1 attribute | 2+ (magetypes + incant + arcane) |
3391///
3392/// Use `#[autoversion]` for scalar loops you want auto-vectorized. Use
3393/// `#[magetypes]` + `incant!` when you need `f32x8`, `u8x32`, and
3394/// hand-tuned SIMD code per architecture
3395#[proc_macro_attribute]
3396pub fn autoversion(attr: TokenStream, item: TokenStream) -> TokenStream {
3397    let args = parse_macro_input!(attr as AutoversionArgs);
3398    let input_fn = parse_macro_input!(item as LightFn);
3399    autoversion_impl(input_fn, args)
3400}
3401
3402// =============================================================================
3403// Unit tests for token/trait recognition maps
3404// =============================================================================
3405
3406#[cfg(test)]
3407mod tests {
3408    use super::*;
3409
3410    use super::generated::{ALL_CONCRETE_TOKENS, ALL_TRAIT_NAMES};
3411    use syn::{ItemFn, ReturnType};
3412
3413    #[test]
3414    fn every_concrete_token_is_in_token_to_features() {
3415        for &name in ALL_CONCRETE_TOKENS {
3416            assert!(
3417                token_to_features(name).is_some(),
3418                "Token `{}` exists in runtime crate but is NOT recognized by \
3419                 token_to_features() in the proc macro. Add it!",
3420                name
3421            );
3422        }
3423    }
3424
3425    #[test]
3426    fn every_trait_is_in_trait_to_features() {
3427        for &name in ALL_TRAIT_NAMES {
3428            assert!(
3429                trait_to_features(name).is_some(),
3430                "Trait `{}` exists in runtime crate but is NOT recognized by \
3431                 trait_to_features() in the proc macro. Add it!",
3432                name
3433            );
3434        }
3435    }
3436
3437    #[test]
3438    fn token_aliases_map_to_same_features() {
3439        // Desktop64 = X64V3Token
3440        assert_eq!(
3441            token_to_features("Desktop64"),
3442            token_to_features("X64V3Token"),
3443            "Desktop64 and X64V3Token should map to identical features"
3444        );
3445
3446        // Server64 = X64V4Token = Avx512Token
3447        assert_eq!(
3448            token_to_features("Server64"),
3449            token_to_features("X64V4Token"),
3450            "Server64 and X64V4Token should map to identical features"
3451        );
3452        assert_eq!(
3453            token_to_features("X64V4Token"),
3454            token_to_features("Avx512Token"),
3455            "X64V4Token and Avx512Token should map to identical features"
3456        );
3457
3458        // Arm64 = NeonToken
3459        assert_eq!(
3460            token_to_features("Arm64"),
3461            token_to_features("NeonToken"),
3462            "Arm64 and NeonToken should map to identical features"
3463        );
3464    }
3465
3466    #[test]
3467    fn trait_to_features_includes_tokens_as_bounds() {
3468        // Tier tokens should also work as trait bounds
3469        // (for `impl X64V3Token` patterns, even though Rust won't allow it,
3470        // the macro processes AST before type checking)
3471        let tier_tokens = [
3472            "X64V2Token",
3473            "X64CryptoToken",
3474            "X64V3Token",
3475            "Desktop64",
3476            "Avx2FmaToken",
3477            "X64V4Token",
3478            "Avx512Token",
3479            "Server64",
3480            "X64V4xToken",
3481            "Avx512Fp16Token",
3482            "NeonToken",
3483            "Arm64",
3484            "NeonAesToken",
3485            "NeonSha3Token",
3486            "NeonCrcToken",
3487            "Arm64V2Token",
3488            "Arm64V3Token",
3489        ];
3490
3491        for &name in &tier_tokens {
3492            assert!(
3493                trait_to_features(name).is_some(),
3494                "Tier token `{}` should also be recognized in trait_to_features() \
3495                 for use as a generic bound. Add it!",
3496                name
3497            );
3498        }
3499    }
3500
3501    #[test]
3502    fn trait_features_are_cumulative() {
3503        // HasX64V4 should include all HasX64V2 features plus more
3504        let v2_features = trait_to_features("HasX64V2").unwrap();
3505        let v4_features = trait_to_features("HasX64V4").unwrap();
3506
3507        for &f in v2_features {
3508            assert!(
3509                v4_features.contains(&f),
3510                "HasX64V4 should include v2 feature `{}` but doesn't",
3511                f
3512            );
3513        }
3514
3515        // v4 should have more features than v2
3516        assert!(
3517            v4_features.len() > v2_features.len(),
3518            "HasX64V4 should have more features than HasX64V2"
3519        );
3520    }
3521
3522    #[test]
3523    fn x64v3_trait_features_include_v2() {
3524        // X64V3Token as trait bound should include v2 features
3525        let v2 = trait_to_features("HasX64V2").unwrap();
3526        let v3 = trait_to_features("X64V3Token").unwrap();
3527
3528        for &f in v2 {
3529            assert!(
3530                v3.contains(&f),
3531                "X64V3Token trait features should include v2 feature `{}` but don't",
3532                f
3533            );
3534        }
3535    }
3536
3537    #[test]
3538    fn has_neon_aes_includes_neon() {
3539        let neon = trait_to_features("HasNeon").unwrap();
3540        let neon_aes = trait_to_features("HasNeonAes").unwrap();
3541
3542        for &f in neon {
3543            assert!(
3544                neon_aes.contains(&f),
3545                "HasNeonAes should include NEON feature `{}`",
3546                f
3547            );
3548        }
3549    }
3550
3551    #[test]
3552    fn no_removed_traits_are_recognized() {
3553        // These traits were removed in 0.3.0 and should NOT be recognized
3554        let removed = [
3555            "HasSse",
3556            "HasSse2",
3557            "HasSse41",
3558            "HasSse42",
3559            "HasAvx",
3560            "HasAvx2",
3561            "HasFma",
3562            "HasAvx512f",
3563            "HasAvx512bw",
3564            "HasAvx512vl",
3565            "HasAvx512vbmi2",
3566            "HasSve",
3567            "HasSve2",
3568        ];
3569
3570        for &name in &removed {
3571            assert!(
3572                trait_to_features(name).is_none(),
3573                "Removed trait `{}` should NOT be in trait_to_features(). \
3574                 It was removed in 0.3.0 — users should migrate to tier traits.",
3575                name
3576            );
3577        }
3578    }
3579
3580    #[test]
3581    fn no_nonexistent_tokens_are_recognized() {
3582        // These tokens don't exist and should NOT be recognized
3583        let fake = [
3584            "SveToken",
3585            "Sve2Token",
3586            "Avx512VnniToken",
3587            "X64V4ModernToken",
3588            "NeonFp16Token",
3589        ];
3590
3591        for &name in &fake {
3592            assert!(
3593                token_to_features(name).is_none(),
3594                "Non-existent token `{}` should NOT be in token_to_features()",
3595                name
3596            );
3597        }
3598    }
3599
3600    #[test]
3601    fn featureless_traits_are_not_in_registries() {
3602        // SimdToken and IntoConcreteToken should NOT be in any feature registry
3603        // because they don't map to CPU features
3604        for &name in FEATURELESS_TRAIT_NAMES {
3605            assert!(
3606                token_to_features(name).is_none(),
3607                "`{}` should NOT be in token_to_features() — it has no CPU features",
3608                name
3609            );
3610            assert!(
3611                trait_to_features(name).is_none(),
3612                "`{}` should NOT be in trait_to_features() — it has no CPU features",
3613                name
3614            );
3615        }
3616    }
3617
3618    #[test]
3619    fn find_featureless_trait_detects_simdtoken() {
3620        let names = vec!["SimdToken".to_string()];
3621        assert_eq!(find_featureless_trait(&names), Some("SimdToken"));
3622
3623        let names = vec!["IntoConcreteToken".to_string()];
3624        assert_eq!(find_featureless_trait(&names), Some("IntoConcreteToken"));
3625
3626        // Feature-bearing traits should NOT be detected
3627        let names = vec!["HasX64V2".to_string()];
3628        assert_eq!(find_featureless_trait(&names), None);
3629
3630        let names = vec!["HasNeon".to_string()];
3631        assert_eq!(find_featureless_trait(&names), None);
3632
3633        // Mixed: if SimdToken is among real traits, still detected
3634        let names = vec!["SimdToken".to_string(), "HasX64V2".to_string()];
3635        assert_eq!(find_featureless_trait(&names), Some("SimdToken"));
3636    }
3637
3638    #[test]
3639    fn arm64_v2_v3_traits_are_cumulative() {
3640        let v2_features = trait_to_features("HasArm64V2").unwrap();
3641        let v3_features = trait_to_features("HasArm64V3").unwrap();
3642
3643        for &f in v2_features {
3644            assert!(
3645                v3_features.contains(&f),
3646                "HasArm64V3 should include v2 feature `{}` but doesn't",
3647                f
3648            );
3649        }
3650
3651        assert!(
3652            v3_features.len() > v2_features.len(),
3653            "HasArm64V3 should have more features than HasArm64V2"
3654        );
3655    }
3656
3657    // =========================================================================
3658    // autoversion — argument parsing
3659    // =========================================================================
3660
3661    #[test]
3662    fn autoversion_args_empty() {
3663        let args: AutoversionArgs = syn::parse_str("").unwrap();
3664        assert!(args.self_type.is_none());
3665        assert!(args.tiers.is_none());
3666    }
3667
3668    #[test]
3669    fn autoversion_args_single_tier() {
3670        let args: AutoversionArgs = syn::parse_str("v3").unwrap();
3671        assert!(args.self_type.is_none());
3672        assert_eq!(args.tiers.as_ref().unwrap(), &["v3"]);
3673    }
3674
3675    #[test]
3676    fn autoversion_args_tiers_only() {
3677        let args: AutoversionArgs = syn::parse_str("v3, v4, neon").unwrap();
3678        assert!(args.self_type.is_none());
3679        let tiers = args.tiers.unwrap();
3680        assert_eq!(tiers, vec!["v3", "v4", "neon"]);
3681    }
3682
3683    #[test]
3684    fn autoversion_args_many_tiers() {
3685        let args: AutoversionArgs =
3686            syn::parse_str("v1, v2, v3, v4, v4x, neon, arm_v2, wasm128").unwrap();
3687        assert_eq!(
3688            args.tiers.unwrap(),
3689            vec!["v1", "v2", "v3", "v4", "v4x", "neon", "arm_v2", "wasm128"]
3690        );
3691    }
3692
3693    #[test]
3694    fn autoversion_args_trailing_comma() {
3695        let args: AutoversionArgs = syn::parse_str("v3, v4,").unwrap();
3696        assert_eq!(args.tiers.as_ref().unwrap(), &["v3", "v4"]);
3697    }
3698
3699    #[test]
3700    fn autoversion_args_self_only() {
3701        let args: AutoversionArgs = syn::parse_str("_self = MyType").unwrap();
3702        assert!(args.self_type.is_some());
3703        assert!(args.tiers.is_none());
3704    }
3705
3706    #[test]
3707    fn autoversion_args_self_and_tiers() {
3708        let args: AutoversionArgs = syn::parse_str("_self = MyType, v3, neon").unwrap();
3709        assert!(args.self_type.is_some());
3710        let tiers = args.tiers.unwrap();
3711        assert_eq!(tiers, vec!["v3", "neon"]);
3712    }
3713
3714    #[test]
3715    fn autoversion_args_tiers_then_self() {
3716        // _self can appear after tier names
3717        let args: AutoversionArgs = syn::parse_str("v3, neon, _self = MyType").unwrap();
3718        assert!(args.self_type.is_some());
3719        let tiers = args.tiers.unwrap();
3720        assert_eq!(tiers, vec!["v3", "neon"]);
3721    }
3722
3723    #[test]
3724    fn autoversion_args_self_with_path_type() {
3725        let args: AutoversionArgs = syn::parse_str("_self = crate::MyType").unwrap();
3726        assert!(args.self_type.is_some());
3727        assert!(args.tiers.is_none());
3728    }
3729
3730    #[test]
3731    fn autoversion_args_self_with_generic_type() {
3732        let args: AutoversionArgs = syn::parse_str("_self = Vec<u8>").unwrap();
3733        assert!(args.self_type.is_some());
3734        let ty_str = args.self_type.unwrap().to_token_stream().to_string();
3735        assert!(ty_str.contains("Vec"), "Expected Vec<u8>, got: {}", ty_str);
3736    }
3737
3738    #[test]
3739    fn autoversion_args_self_trailing_comma() {
3740        let args: AutoversionArgs = syn::parse_str("_self = MyType,").unwrap();
3741        assert!(args.self_type.is_some());
3742        assert!(args.tiers.is_none());
3743    }
3744
3745    // =========================================================================
3746    // autoversion — find_simd_token_param
3747    // =========================================================================
3748
3749    #[test]
3750    fn find_simd_token_param_first_position() {
3751        let f: ItemFn =
3752            syn::parse_str("fn process(token: SimdToken, data: &[f32]) -> f32 {}").unwrap();
3753        let param = find_simd_token_param(&f.sig).unwrap();
3754        assert_eq!(param.index, 0);
3755        assert_eq!(param.ident, "token");
3756    }
3757
3758    #[test]
3759    fn find_simd_token_param_second_position() {
3760        let f: ItemFn =
3761            syn::parse_str("fn process(data: &[f32], token: SimdToken) -> f32 {}").unwrap();
3762        let param = find_simd_token_param(&f.sig).unwrap();
3763        assert_eq!(param.index, 1);
3764        assert_eq!(param.ident, "token");
3765    }
3766
3767    #[test]
3768    fn find_simd_token_param_underscore_prefix() {
3769        let f: ItemFn =
3770            syn::parse_str("fn process(_token: SimdToken, data: &[f32]) -> f32 {}").unwrap();
3771        let param = find_simd_token_param(&f.sig).unwrap();
3772        assert_eq!(param.index, 0);
3773        assert_eq!(param.ident, "_token");
3774    }
3775
3776    #[test]
3777    fn find_simd_token_param_wildcard() {
3778        let f: ItemFn = syn::parse_str("fn process(_: SimdToken, data: &[f32]) -> f32 {}").unwrap();
3779        let param = find_simd_token_param(&f.sig).unwrap();
3780        assert_eq!(param.index, 0);
3781        assert_eq!(param.ident, "__autoversion_token");
3782    }
3783
3784    #[test]
3785    fn find_simd_token_param_not_found() {
3786        let f: ItemFn = syn::parse_str("fn process(data: &[f32]) -> f32 {}").unwrap();
3787        assert!(find_simd_token_param(&f.sig).is_none());
3788    }
3789
3790    #[test]
3791    fn find_simd_token_param_no_params() {
3792        let f: ItemFn = syn::parse_str("fn process() {}").unwrap();
3793        assert!(find_simd_token_param(&f.sig).is_none());
3794    }
3795
3796    #[test]
3797    fn find_simd_token_param_concrete_token_not_matched() {
3798        // autoversion looks specifically for SimdToken, not concrete tokens
3799        let f: ItemFn =
3800            syn::parse_str("fn process(token: X64V3Token, data: &[f32]) -> f32 {}").unwrap();
3801        assert!(find_simd_token_param(&f.sig).is_none());
3802    }
3803
3804    #[test]
3805    fn find_simd_token_param_scalar_token_not_matched() {
3806        let f: ItemFn =
3807            syn::parse_str("fn process(token: ScalarToken, data: &[f32]) -> f32 {}").unwrap();
3808        assert!(find_simd_token_param(&f.sig).is_none());
3809    }
3810
3811    #[test]
3812    fn find_simd_token_param_among_many() {
3813        let f: ItemFn = syn::parse_str(
3814            "fn process(a: i32, b: f64, token: SimdToken, c: &str, d: bool) -> f32 {}",
3815        )
3816        .unwrap();
3817        let param = find_simd_token_param(&f.sig).unwrap();
3818        assert_eq!(param.index, 2);
3819        assert_eq!(param.ident, "token");
3820    }
3821
3822    #[test]
3823    fn find_simd_token_param_with_generics() {
3824        let f: ItemFn =
3825            syn::parse_str("fn process<T: Clone>(token: SimdToken, data: &[T]) -> T {}").unwrap();
3826        let param = find_simd_token_param(&f.sig).unwrap();
3827        assert_eq!(param.index, 0);
3828        assert_eq!(param.ident, "token");
3829    }
3830
3831    #[test]
3832    fn find_simd_token_param_with_where_clause() {
3833        let f: ItemFn = syn::parse_str(
3834            "fn process<T>(token: SimdToken, data: &[T]) -> T where T: Copy + Default {}",
3835        )
3836        .unwrap();
3837        let param = find_simd_token_param(&f.sig).unwrap();
3838        assert_eq!(param.index, 0);
3839    }
3840
3841    #[test]
3842    fn find_simd_token_param_with_lifetime() {
3843        let f: ItemFn =
3844            syn::parse_str("fn process<'a>(token: SimdToken, data: &'a [f32]) -> &'a f32 {}")
3845                .unwrap();
3846        let param = find_simd_token_param(&f.sig).unwrap();
3847        assert_eq!(param.index, 0);
3848    }
3849
3850    // =========================================================================
3851    // autoversion — tier resolution
3852    // =========================================================================
3853
3854    #[test]
3855    fn autoversion_default_tiers_all_resolve() {
3856        let names: Vec<String> = DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect();
3857        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3858        assert!(!tiers.is_empty());
3859        // scalar should be present
3860        assert!(tiers.iter().any(|t| t.name == "scalar"));
3861    }
3862
3863    #[test]
3864    fn autoversion_scalar_always_appended() {
3865        let names = vec!["v3".to_string(), "neon".to_string()];
3866        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3867        assert!(
3868            tiers.iter().any(|t| t.name == "scalar"),
3869            "scalar must be auto-appended"
3870        );
3871    }
3872
3873    #[test]
3874    fn autoversion_scalar_not_duplicated() {
3875        let names = vec!["v3".to_string(), "scalar".to_string()];
3876        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3877        let scalar_count = tiers.iter().filter(|t| t.name == "scalar").count();
3878        assert_eq!(scalar_count, 1, "scalar must not be duplicated");
3879    }
3880
3881    #[test]
3882    fn autoversion_tiers_sorted_by_priority() {
3883        let names = vec!["neon".to_string(), "v4".to_string(), "v3".to_string()];
3884        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3885        // v4 (priority 40) > v3 (30) > neon (20) > scalar (0)
3886        let priorities: Vec<u32> = tiers.iter().map(|t| t.priority).collect();
3887        for window in priorities.windows(2) {
3888            assert!(
3889                    window[0] >= window[1],
3890            cfg_feature: None,
3891                    "Tiers not sorted by priority: {:?}",
3892                    priorities
3893                );
3894        }
3895    }
3896
3897    #[test]
3898    fn autoversion_unknown_tier_errors() {
3899        let names = vec!["v3".to_string(), "avx9000".to_string()];
3900        let result = resolve_tiers(&names, proc_macro2::Span::call_site(), false);
3901        match result {
3902            Ok(_) => panic!("Expected error for unknown tier 'avx9000'"),
3903            Err(e) => {
3904                let err_msg = e.to_string();
3905                assert!(
3906                    err_msg.contains("avx9000"),
3907                    "Error should mention unknown tier: {}",
3908                    err_msg
3909                );
3910            }
3911        }
3912    }
3913
3914    #[test]
3915    fn autoversion_all_known_tiers_resolve() {
3916        // Every tier in ALL_TIERS should be findable
3917        for tier in ALL_TIERS {
3918            assert!(
3919                find_tier(tier.name).is_some(),
3920                "Tier '{}' should be findable by name",
3921                tier.name
3922            );
3923        }
3924    }
3925
3926    #[test]
3927    fn autoversion_default_tier_list_is_sensible() {
3928        // Defaults should cover x86, ARM, WASM, and scalar
3929        let names: Vec<String> = DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect();
3930        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3931
3932        let has_x86 = tiers.iter().any(|t| t.target_arch == Some("x86_64"));
3933        let has_arm = tiers.iter().any(|t| t.target_arch == Some("aarch64"));
3934        let has_wasm = tiers.iter().any(|t| t.target_arch == Some("wasm32"));
3935        let has_scalar = tiers.iter().any(|t| t.name == "scalar");
3936
3937        assert!(has_x86, "Default tiers should include an x86_64 tier");
3938        assert!(has_arm, "Default tiers should include an aarch64 tier");
3939        assert!(has_wasm, "Default tiers should include a wasm32 tier");
3940        assert!(has_scalar, "Default tiers should include scalar");
3941    }
3942
3943    // =========================================================================
3944    // autoversion — variant replacement (AST manipulation)
3945    // =========================================================================
3946
3947    /// Mirrors what `autoversion_impl` does for a single variant: parse an
3948    /// ItemFn (for test convenience), rename it, swap the SimdToken param
3949    /// type, optionally inject the `_self` preamble for scalar+self.
3950    fn do_variant_replacement(func: &str, tier_name: &str, has_self: bool) -> ItemFn {
3951        let mut f: ItemFn = syn::parse_str(func).unwrap();
3952        let fn_name = f.sig.ident.to_string();
3953
3954        let tier = find_tier(tier_name).unwrap();
3955
3956        // Rename
3957        f.sig.ident = format_ident!("{}_{}", fn_name, tier.suffix);
3958
3959        // Find and replace SimdToken param type
3960        let token_idx = find_simd_token_param(&f.sig)
3961            .unwrap_or_else(|| panic!("No SimdToken param in: {}", func))
3962            .index;
3963        let concrete_type: Type = syn::parse_str(tier.token_path).unwrap();
3964        if let FnArg::Typed(pt) = &mut f.sig.inputs[token_idx] {
3965            *pt.ty = concrete_type;
3966        }
3967
3968        // Scalar + self: inject preamble
3969        if tier_name == "scalar" && has_self {
3970            let preamble: syn::Stmt = syn::parse_quote!(let _self = self;);
3971            f.block.stmts.insert(0, preamble);
3972        }
3973
3974        f
3975    }
3976
3977    #[test]
3978    fn variant_replacement_v3_renames_function() {
3979        let f = do_variant_replacement(
3980            "fn process(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3981            "v3",
3982            false,
3983        );
3984        assert_eq!(f.sig.ident, "process_v3");
3985    }
3986
3987    #[test]
3988    fn variant_replacement_v3_replaces_token_type() {
3989        let f = do_variant_replacement(
3990            "fn process(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3991            "v3",
3992            false,
3993        );
3994        let first_param_ty = match &f.sig.inputs[0] {
3995            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
3996            _ => panic!("Expected typed param"),
3997        };
3998        assert!(
3999            first_param_ty.contains("X64V3Token"),
4000            "Expected X64V3Token, got: {}",
4001            first_param_ty
4002        );
4003    }
4004
4005    #[test]
4006    fn variant_replacement_neon_produces_valid_fn() {
4007        let f = do_variant_replacement(
4008            "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
4009            "neon",
4010            false,
4011        );
4012        assert_eq!(f.sig.ident, "compute_neon");
4013        let first_param_ty = match &f.sig.inputs[0] {
4014            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
4015            _ => panic!("Expected typed param"),
4016        };
4017        assert!(
4018            first_param_ty.contains("NeonToken"),
4019            "Expected NeonToken, got: {}",
4020            first_param_ty
4021        );
4022    }
4023
4024    #[test]
4025    fn variant_replacement_wasm128_produces_valid_fn() {
4026        let f = do_variant_replacement(
4027            "fn compute(_t: SimdToken, data: &[f32]) -> f32 { 0.0 }",
4028            "wasm128",
4029            false,
4030        );
4031        assert_eq!(f.sig.ident, "compute_wasm128");
4032    }
4033
4034    #[test]
4035    fn variant_replacement_scalar_produces_valid_fn() {
4036        let f = do_variant_replacement(
4037            "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
4038            "scalar",
4039            false,
4040        );
4041        assert_eq!(f.sig.ident, "compute_scalar");
4042        let first_param_ty = match &f.sig.inputs[0] {
4043            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
4044            _ => panic!("Expected typed param"),
4045        };
4046        assert!(
4047            first_param_ty.contains("ScalarToken"),
4048            "Expected ScalarToken, got: {}",
4049            first_param_ty
4050        );
4051    }
4052
4053    #[test]
4054    fn variant_replacement_v4_produces_valid_fn() {
4055        let f = do_variant_replacement(
4056            "fn transform(token: SimdToken, data: &mut [f32]) { }",
4057            "v4",
4058            false,
4059        );
4060        assert_eq!(f.sig.ident, "transform_v4");
4061        let first_param_ty = match &f.sig.inputs[0] {
4062            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
4063            _ => panic!("Expected typed param"),
4064        };
4065        assert!(
4066            first_param_ty.contains("X64V4Token"),
4067            "Expected X64V4Token, got: {}",
4068            first_param_ty
4069        );
4070    }
4071
4072    #[test]
4073    fn variant_replacement_v4x_produces_valid_fn() {
4074        let f = do_variant_replacement(
4075            "fn transform(token: SimdToken, data: &mut [f32]) { }",
4076            "v4x",
4077            false,
4078        );
4079        assert_eq!(f.sig.ident, "transform_v4x");
4080    }
4081
4082    #[test]
4083    fn variant_replacement_arm_v2_produces_valid_fn() {
4084        let f = do_variant_replacement(
4085            "fn transform(token: SimdToken, data: &mut [f32]) { }",
4086            "arm_v2",
4087            false,
4088        );
4089        assert_eq!(f.sig.ident, "transform_arm_v2");
4090    }
4091
4092    #[test]
4093    fn variant_replacement_preserves_generics() {
4094        let f = do_variant_replacement(
4095            "fn process<T: Copy + Default>(token: SimdToken, data: &[T]) -> T { T::default() }",
4096            "v3",
4097            false,
4098        );
4099        assert_eq!(f.sig.ident, "process_v3");
4100        // Generic params should still be present
4101        assert!(
4102            !f.sig.generics.params.is_empty(),
4103            "Generics should be preserved"
4104        );
4105    }
4106
4107    #[test]
4108    fn variant_replacement_preserves_where_clause() {
4109        let f = do_variant_replacement(
4110            "fn process<T>(token: SimdToken, data: &[T]) -> T where T: Copy + Default { T::default() }",
4111            "v3",
4112            false,
4113        );
4114        assert!(
4115            f.sig.generics.where_clause.is_some(),
4116            "Where clause should be preserved"
4117        );
4118    }
4119
4120    #[test]
4121    fn variant_replacement_preserves_return_type() {
4122        let f = do_variant_replacement(
4123            "fn process(token: SimdToken, data: &[f32]) -> Vec<f32> { vec![] }",
4124            "neon",
4125            false,
4126        );
4127        let ret = f.sig.output.to_token_stream().to_string();
4128        assert!(
4129            ret.contains("Vec"),
4130            "Return type should be preserved, got: {}",
4131            ret
4132        );
4133    }
4134
4135    #[test]
4136    fn variant_replacement_preserves_multiple_params() {
4137        let f = do_variant_replacement(
4138            "fn process(token: SimdToken, a: &[f32], b: &[f32], scale: f32) -> f32 { 0.0 }",
4139            "v3",
4140            false,
4141        );
4142        // SimdToken → X64V3Token, plus the 3 other params
4143        assert_eq!(f.sig.inputs.len(), 4);
4144    }
4145
4146    #[test]
4147    fn variant_replacement_preserves_no_return_type() {
4148        let f = do_variant_replacement(
4149            "fn transform(token: SimdToken, data: &mut [f32]) { }",
4150            "v3",
4151            false,
4152        );
4153        assert!(
4154            matches!(f.sig.output, ReturnType::Default),
4155            "No return type should remain as Default"
4156        );
4157    }
4158
4159    #[test]
4160    fn variant_replacement_preserves_lifetime_params() {
4161        let f = do_variant_replacement(
4162            "fn process<'a>(token: SimdToken, data: &'a [f32]) -> &'a [f32] { data }",
4163            "v3",
4164            false,
4165        );
4166        assert!(!f.sig.generics.params.is_empty());
4167    }
4168
4169    #[test]
4170    fn variant_replacement_scalar_self_injects_preamble() {
4171        let f = do_variant_replacement(
4172            "fn method(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
4173            "scalar",
4174            true, // has_self
4175        );
4176        assert_eq!(f.sig.ident, "method_scalar");
4177
4178        // First statement should be `let _self = self;`
4179        let body_str = f.block.to_token_stream().to_string();
4180        assert!(
4181            body_str.contains("let _self = self"),
4182            "Scalar+self variant should have _self preamble, got: {}",
4183            body_str
4184        );
4185    }
4186
4187    #[test]
4188    fn variant_replacement_all_default_tiers_produce_valid_fns() {
4189        let names: Vec<String> = DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect();
4190        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
4191
4192        for tier in &tiers {
4193            let f = do_variant_replacement(
4194                "fn process(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
4195                tier.name,
4196                false,
4197            );
4198            let expected_name = format!("process_{}", tier.suffix);
4199            assert_eq!(
4200                f.sig.ident.to_string(),
4201                expected_name,
4202                "Tier '{}' should produce function '{}'",
4203                tier.name,
4204                expected_name
4205            );
4206        }
4207    }
4208
4209    #[test]
4210    fn variant_replacement_all_known_tiers_produce_valid_fns() {
4211        for tier in ALL_TIERS {
4212            let f = do_variant_replacement(
4213                "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
4214                tier.name,
4215                false,
4216            );
4217            let expected_name = format!("compute_{}", tier.suffix);
4218            assert_eq!(
4219                f.sig.ident.to_string(),
4220                expected_name,
4221                "Tier '{}' should produce function '{}'",
4222                tier.name,
4223                expected_name
4224            );
4225        }
4226    }
4227
4228    #[test]
4229    fn variant_replacement_no_simdtoken_remains() {
4230        for tier in ALL_TIERS {
4231            let f = do_variant_replacement(
4232                "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
4233                tier.name,
4234                false,
4235            );
4236            let full_str = f.to_token_stream().to_string();
4237            assert!(
4238                !full_str.contains("SimdToken"),
4239                "Tier '{}' variant still contains 'SimdToken': {}",
4240                tier.name,
4241                full_str
4242            );
4243        }
4244    }
4245
4246    // =========================================================================
4247    // autoversion — cfg guard and tier descriptor properties
4248    // =========================================================================
4249
4250    #[test]
4251    fn tier_v3_targets_x86_64() {
4252        let tier = find_tier("v3").unwrap();
4253        assert_eq!(tier.target_arch, Some("x86_64"));
4254    }
4255
4256    #[test]
4257    fn tier_v4_targets_x86_64() {
4258        let tier = find_tier("v4").unwrap();
4259        assert_eq!(tier.target_arch, Some("x86_64"));
4260    }
4261
4262    #[test]
4263    fn tier_v4x_targets_x86_64() {
4264        let tier = find_tier("v4x").unwrap();
4265        assert_eq!(tier.target_arch, Some("x86_64"));
4266    }
4267
4268    #[test]
4269    fn tier_neon_targets_aarch64() {
4270        let tier = find_tier("neon").unwrap();
4271        assert_eq!(tier.target_arch, Some("aarch64"));
4272    }
4273
4274    #[test]
4275    fn tier_wasm128_targets_wasm32() {
4276        let tier = find_tier("wasm128").unwrap();
4277        assert_eq!(tier.target_arch, Some("wasm32"));
4278    }
4279
4280    #[test]
4281    fn tier_scalar_has_no_guards() {
4282        let tier = find_tier("scalar").unwrap();
4283        assert_eq!(tier.target_arch, None);
4284        assert_eq!(tier.priority, 0);
4285    }
4286
4287    #[test]
4288    fn tier_priorities_are_consistent() {
4289        // Higher-capability tiers within the same arch should have higher priority
4290        let v2 = find_tier("v2").unwrap();
4291        let v3 = find_tier("v3").unwrap();
4292        let v4 = find_tier("v4").unwrap();
4293        assert!(v4.priority > v3.priority);
4294        assert!(v3.priority > v2.priority);
4295
4296        let neon = find_tier("neon").unwrap();
4297        let arm_v2 = find_tier("arm_v2").unwrap();
4298        let arm_v3 = find_tier("arm_v3").unwrap();
4299        assert!(arm_v3.priority > arm_v2.priority);
4300        assert!(arm_v2.priority > neon.priority);
4301
4302        // scalar is lowest
4303        let scalar = find_tier("scalar").unwrap();
4304        assert!(neon.priority > scalar.priority);
4305        assert!(v2.priority > scalar.priority);
4306    }
4307
4308    // =========================================================================
4309    // autoversion — dispatcher structure
4310    // =========================================================================
4311
4312    #[test]
4313    fn dispatcher_param_removal_free_fn() {
4314        // Simulate what autoversion_impl does: remove the SimdToken param
4315        let f: ItemFn =
4316            syn::parse_str("fn process(token: SimdToken, data: &[f32], scale: f32) -> f32 { 0.0 }")
4317                .unwrap();
4318
4319        let token_param = find_simd_token_param(&f.sig).unwrap();
4320        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4321        dispatcher_inputs.remove(token_param.index);
4322
4323        // Should have 2 params remaining: data, scale
4324        assert_eq!(dispatcher_inputs.len(), 2);
4325
4326        // Neither should be SimdToken
4327        for arg in &dispatcher_inputs {
4328            if let FnArg::Typed(pt) = arg {
4329                let ty_str = pt.ty.to_token_stream().to_string();
4330                assert!(
4331                    !ty_str.contains("SimdToken"),
4332                    "SimdToken should be removed from dispatcher, found: {}",
4333                    ty_str
4334                );
4335            }
4336        }
4337    }
4338
4339    #[test]
4340    fn dispatcher_param_removal_token_only() {
4341        let f: ItemFn = syn::parse_str("fn process(token: SimdToken) -> f32 { 0.0 }").unwrap();
4342
4343        let token_param = find_simd_token_param(&f.sig).unwrap();
4344        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4345        dispatcher_inputs.remove(token_param.index);
4346
4347        // No params left — dispatcher takes no arguments
4348        assert_eq!(dispatcher_inputs.len(), 0);
4349    }
4350
4351    #[test]
4352    fn dispatcher_param_removal_token_last() {
4353        let f: ItemFn =
4354            syn::parse_str("fn process(data: &[f32], scale: f32, token: SimdToken) -> f32 { 0.0 }")
4355                .unwrap();
4356
4357        let token_param = find_simd_token_param(&f.sig).unwrap();
4358        assert_eq!(token_param.index, 2);
4359
4360        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4361        dispatcher_inputs.remove(token_param.index);
4362
4363        assert_eq!(dispatcher_inputs.len(), 2);
4364    }
4365
4366    #[test]
4367    fn dispatcher_dispatch_args_extraction() {
4368        // Test that we correctly extract idents for the dispatch call
4369        let f: ItemFn =
4370            syn::parse_str("fn process(data: &[f32], scale: f32) -> f32 { 0.0 }").unwrap();
4371
4372        let dispatch_args: Vec<String> = f
4373            .sig
4374            .inputs
4375            .iter()
4376            .filter_map(|arg| {
4377                if let FnArg::Typed(PatType { pat, .. }) = arg {
4378                    if let syn::Pat::Ident(pi) = pat.as_ref() {
4379                        return Some(pi.ident.to_string());
4380                    }
4381                }
4382                None
4383            })
4384            .collect();
4385
4386        assert_eq!(dispatch_args, vec!["data", "scale"]);
4387    }
4388
4389    #[test]
4390    fn dispatcher_wildcard_params_get_renamed() {
4391        let f: ItemFn = syn::parse_str("fn process(_: &[f32], _: f32) -> f32 { 0.0 }").unwrap();
4392
4393        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4394
4395        let mut wild_counter = 0u32;
4396        for arg in &mut dispatcher_inputs {
4397            if let FnArg::Typed(pat_type) = arg {
4398                if matches!(pat_type.pat.as_ref(), syn::Pat::Wild(_)) {
4399                    let ident = format_ident!("__autoversion_wild_{}", wild_counter);
4400                    wild_counter += 1;
4401                    *pat_type.pat = syn::Pat::Ident(syn::PatIdent {
4402                        attrs: vec![],
4403                        by_ref: None,
4404                        mutability: None,
4405                        ident,
4406                        subpat: None,
4407                    });
4408                }
4409            }
4410        }
4411
4412        // Both wildcards should be renamed
4413        assert_eq!(wild_counter, 2);
4414
4415        let names: Vec<String> = dispatcher_inputs
4416            .iter()
4417            .filter_map(|arg| {
4418                if let FnArg::Typed(PatType { pat, .. }) = arg {
4419                    if let syn::Pat::Ident(pi) = pat.as_ref() {
4420                        return Some(pi.ident.to_string());
4421                    }
4422                }
4423                None
4424            })
4425            .collect();
4426
4427        assert_eq!(names, vec!["__autoversion_wild_0", "__autoversion_wild_1"]);
4428    }
4429
4430    // =========================================================================
4431    // autoversion — suffix_path (reused in dispatch)
4432    // =========================================================================
4433
4434    #[test]
4435    fn suffix_path_simple() {
4436        let path: syn::Path = syn::parse_str("process").unwrap();
4437        let suffixed = suffix_path(&path, "v3");
4438        assert_eq!(suffixed.to_token_stream().to_string(), "process_v3");
4439    }
4440
4441    #[test]
4442    fn suffix_path_qualified() {
4443        let path: syn::Path = syn::parse_str("module::process").unwrap();
4444        let suffixed = suffix_path(&path, "neon");
4445        let s = suffixed.to_token_stream().to_string();
4446        assert!(
4447            s.contains("process_neon"),
4448            "Expected process_neon, got: {}",
4449            s
4450        );
4451    }
4452}