Skip to main content

archmage_macros/
lib.rs

1//! Proc-macros for archmage SIMD capability tokens.
2//!
3//! Provides `#[arcane]` attribute (with `#[arcane]` alias) to make raw intrinsics
4//! safe via token proof.
5
6use proc_macro::TokenStream;
7use quote::{ToTokens, format_ident, quote, quote_spanned};
8use syn::{
9    Attribute, FnArg, GenericParam, Ident, PatType, Signature, Token, Type, TypeParamBound,
10    parse::{Parse, ParseStream},
11    parse_macro_input, parse_quote, token,
12};
13
14/// A function parsed with the body left as an opaque TokenStream.
15///
16/// Only the signature is fully parsed into an AST — the body tokens are collected
17/// without building any AST nodes (no expressions, statements, or patterns parsed).
18/// This saves ~2ms per function invocation at 100 lines of code.
19#[derive(Clone)]
20struct LightFn {
21    attrs: Vec<Attribute>,
22    vis: syn::Visibility,
23    sig: Signature,
24    brace_token: token::Brace,
25    body: proc_macro2::TokenStream,
26}
27
28impl Parse for LightFn {
29    fn parse(input: ParseStream) -> syn::Result<Self> {
30        let attrs = input.call(Attribute::parse_outer)?;
31        let vis: syn::Visibility = input.parse()?;
32        let sig: Signature = input.parse()?;
33        let content;
34        let brace_token = syn::braced!(content in input);
35        let body: proc_macro2::TokenStream = content.parse()?;
36        Ok(LightFn {
37            attrs,
38            vis,
39            sig,
40            brace_token,
41            body,
42        })
43    }
44}
45
46impl ToTokens for LightFn {
47    fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
48        for attr in &self.attrs {
49            attr.to_tokens(tokens);
50        }
51        self.vis.to_tokens(tokens);
52        self.sig.to_tokens(tokens);
53        self.brace_token.surround(tokens, |tokens| {
54            self.body.to_tokens(tokens);
55        });
56    }
57}
58
59/// Filter out `#[inline]`, `#[inline(always)]`, `#[inline(never)]` from attributes.
60///
61/// Used to prevent duplicate inline attributes when the macro adds its own.
62/// Duplicate `#[inline]` is a warning that will become a hard error.
63fn filter_inline_attrs(attrs: &[Attribute]) -> Vec<&Attribute> {
64    attrs
65        .iter()
66        .filter(|attr| !attr.path().is_ident("inline"))
67        .collect()
68}
69
70/// Check if an attribute is a lint-control attribute.
71///
72/// Lint-control attributes (`#[allow(...)]`, `#[expect(...)]`, `#[deny(...)]`,
73/// `#[warn(...)]`, `#[forbid(...)]`) must be propagated to generated sibling
74/// functions so that user-applied lint suppressions work on the generated code.
75fn is_lint_attr(attr: &Attribute) -> bool {
76    let path = attr.path();
77    path.is_ident("allow")
78        || path.is_ident("expect")
79        || path.is_ident("deny")
80        || path.is_ident("warn")
81        || path.is_ident("forbid")
82}
83
84/// Extract lint-control attributes from a list of attributes.
85///
86/// Returns references to `#[allow(...)]`, `#[expect(...)]`, `#[deny(...)]`,
87/// `#[warn(...)]`, and `#[forbid(...)]` attributes. These need to be propagated
88/// to generated sibling functions so clippy/rustc lint suppressions work.
89fn filter_lint_attrs(attrs: &[Attribute]) -> Vec<&Attribute> {
90    attrs.iter().filter(|attr| is_lint_attr(attr)).collect()
91}
92
93/// Generate a cfg guard combining target_arch and an optional feature gate.
94///
95/// - `(Some("x86_64"), None)` → `#[cfg(target_arch = "x86_64")]`
96/// - `(Some("x86_64"), Some("avx512"))` → `#[cfg(all(target_arch = "x86_64", feature = "avx512"))]`
97/// - `(None, Some("avx512"))` → `#[cfg(feature = "avx512")]`
98/// - `(None, None)` → empty
99fn gen_cfg_guard(target_arch: Option<&str>, cfg_feature: Option<&str>) -> proc_macro2::TokenStream {
100    match (target_arch, cfg_feature) {
101        (Some(arch), Some(feat)) => {
102            quote! { #[cfg(all(target_arch = #arch, feature = #feat))] }
103        }
104        (Some(arch), None) => quote! { #[cfg(target_arch = #arch)] },
105        (None, Some(feat)) => quote! { #[cfg(feature = #feat)] },
106        (None, None) => quote! {},
107    }
108}
109
110/// Build a turbofish token stream from a function's generics.
111///
112/// Collects type and const generic parameters (skipping lifetimes) and returns
113/// a `::<A, B, N, M>` turbofish fragment. Returns empty tokens if there are no
114/// type/const generics to forward.
115///
116/// This is needed when the dispatcher or wrapper calls variant/sibling functions
117/// that have const generics not inferable from argument types alone.
118fn build_turbofish(generics: &syn::Generics) -> proc_macro2::TokenStream {
119    let params: Vec<proc_macro2::TokenStream> = generics
120        .params
121        .iter()
122        .filter_map(|param| match param {
123            GenericParam::Type(tp) => {
124                let ident = &tp.ident;
125                Some(quote! { #ident })
126            }
127            GenericParam::Const(cp) => {
128                let ident = &cp.ident;
129                Some(quote! { #ident })
130            }
131            GenericParam::Lifetime(_) => None,
132        })
133        .collect();
134    if params.is_empty() {
135        quote! {}
136    } else {
137        quote! { ::<#(#params),*> }
138    }
139}
140
141/// Replace all `Self` identifier tokens with a concrete type in a token stream.
142///
143/// Recurses into groups (braces, parens, brackets). Used for `#[arcane(_self = Type)]`
144/// to replace `Self` in both the return type and body without needing to parse the body.
145fn replace_self_in_tokens(
146    tokens: proc_macro2::TokenStream,
147    replacement: &Type,
148) -> proc_macro2::TokenStream {
149    let mut result = proc_macro2::TokenStream::new();
150    for tt in tokens {
151        match tt {
152            proc_macro2::TokenTree::Ident(ref ident) if ident == "Self" => {
153                result.extend(replacement.to_token_stream());
154            }
155            proc_macro2::TokenTree::Group(group) => {
156                let new_stream = replace_self_in_tokens(group.stream(), replacement);
157                let mut new_group = proc_macro2::Group::new(group.delimiter(), new_stream);
158                new_group.set_span(group.span());
159                result.extend(std::iter::once(proc_macro2::TokenTree::Group(new_group)));
160            }
161            other => {
162                result.extend(std::iter::once(other));
163            }
164        }
165    }
166    result
167}
168
169/// Arguments to the `#[arcane]` macro.
170#[derive(Default)]
171struct ArcaneArgs {
172    /// Use `#[inline(always)]` instead of `#[inline]` for the inner function.
173    /// Requires nightly Rust with `#![feature(target_feature_inline_always)]`.
174    inline_always: bool,
175    /// The concrete type to use for `self` receiver.
176    /// When specified, `self`/`&self`/`&mut self` is transformed to `_self: Type`/`&Type`/`&mut Type`.
177    /// Implies `nested = true`.
178    self_type: Option<Type>,
179    /// Generate an `unreachable!()` stub on the wrong architecture.
180    /// Default is false (cfg-out: no function emitted on wrong arch).
181    stub: bool,
182    /// Use nested inner function instead of sibling function.
183    /// Implied by `_self = Type`. Required for associated functions in impl blocks
184    /// that have no `self` receiver (the macro can't distinguish them from free functions).
185    nested: bool,
186    /// Inject `use archmage::intrinsics::{arch}::*;` (includes safe memory ops).
187    import_intrinsics: bool,
188    /// Inject `use magetypes::simd::{ns}::*;`, `use magetypes::simd::generic::*;`,
189    /// and `use magetypes::simd::backends::*;`.
190    import_magetypes: bool,
191    /// Additional cargo feature gate. When set, the generated `#[cfg(target_arch)]`
192    /// becomes `#[cfg(all(target_arch = "...", feature = "..."))]`.
193    /// Example: `#[arcane(cfg(avx512))]` → `#[cfg(all(target_arch = "x86_64", feature = "avx512"))]`
194    cfg_feature: Option<String>,
195}
196
197impl Parse for ArcaneArgs {
198    fn parse(input: ParseStream) -> syn::Result<Self> {
199        let mut args = ArcaneArgs::default();
200
201        while !input.is_empty() {
202            let ident: Ident = input.parse()?;
203            match ident.to_string().as_str() {
204                "inline_always" => args.inline_always = true,
205                "stub" => args.stub = true,
206                "nested" => args.nested = true,
207                "import_intrinsics" => args.import_intrinsics = true,
208                "import_magetypes" => args.import_magetypes = true,
209                "cfg" => {
210                    let content;
211                    syn::parenthesized!(content in input);
212                    let feat: Ident = content.parse()?;
213                    args.cfg_feature = Some(feat.to_string());
214                }
215                "_self" => {
216                    let _: Token![=] = input.parse()?;
217                    args.self_type = Some(input.parse()?);
218                }
219                other => {
220                    return Err(syn::Error::new(
221                        ident.span(),
222                        format!("unknown arcane argument: `{}`", other),
223                    ));
224                }
225            }
226            // Consume optional comma
227            if input.peek(Token![,]) {
228                let _: Token![,] = input.parse()?;
229            }
230        }
231
232        // _self = Type implies nested (inner fn needed for Self replacement)
233        if args.self_type.is_some() {
234            args.nested = true;
235        }
236
237        Ok(args)
238    }
239}
240
241// Token-to-features and trait-to-features mappings are generated from
242// token-registry.toml by xtask. Regenerate with: cargo run -p xtask -- generate
243mod generated;
244use generated::{
245    canonical_token_to_tier_suffix, tier_to_canonical_token, token_to_arch, token_to_features,
246    token_to_magetypes_namespace, trait_to_arch, trait_to_features, trait_to_magetypes_namespace,
247};
248
249/// Result of extracting token info from a type.
250enum TokenTypeInfo {
251    /// Concrete token type (e.g., `Avx2Token`)
252    Concrete(String),
253    /// impl Trait with the trait names (e.g., `impl HasX64V2`)
254    ImplTrait(Vec<String>),
255    /// Generic type parameter name (e.g., `T`)
256    Generic(String),
257}
258
259/// Extract token type information from a type.
260fn extract_token_type_info(ty: &Type) -> Option<TokenTypeInfo> {
261    match ty {
262        Type::Path(type_path) => {
263            // Get the last segment of the path (e.g., "Avx2Token" from "archmage::Avx2Token")
264            type_path.path.segments.last().map(|seg| {
265                let name = seg.ident.to_string();
266                // Check if it's a known concrete token type
267                if token_to_features(&name).is_some() {
268                    TokenTypeInfo::Concrete(name)
269                } else {
270                    // Might be a generic type parameter like `T`
271                    TokenTypeInfo::Generic(name)
272                }
273            })
274        }
275        Type::Reference(type_ref) => {
276            // Handle &Token or &mut Token
277            extract_token_type_info(&type_ref.elem)
278        }
279        Type::ImplTrait(impl_trait) => {
280            // Handle `impl HasX64V2` or `impl HasX64V2 + HasNeon`
281            let traits: Vec<String> = extract_trait_names_from_bounds(&impl_trait.bounds);
282            if traits.is_empty() {
283                None
284            } else {
285                Some(TokenTypeInfo::ImplTrait(traits))
286            }
287        }
288        _ => None,
289    }
290}
291
292/// Extract trait names from type param bounds.
293fn extract_trait_names_from_bounds(
294    bounds: &syn::punctuated::Punctuated<TypeParamBound, Token![+]>,
295) -> Vec<String> {
296    bounds
297        .iter()
298        .filter_map(|bound| {
299            if let TypeParamBound::Trait(trait_bound) = bound {
300                trait_bound
301                    .path
302                    .segments
303                    .last()
304                    .map(|seg| seg.ident.to_string())
305            } else {
306                None
307            }
308        })
309        .collect()
310}
311
312/// Look up a generic type parameter in the function's generics.
313fn find_generic_bounds(sig: &Signature, type_name: &str) -> Option<Vec<String>> {
314    // Check inline bounds first (e.g., `fn foo<T: HasX64V2>(token: T)`)
315    for param in &sig.generics.params {
316        if let GenericParam::Type(type_param) = param
317            && type_param.ident == type_name
318        {
319            let traits = extract_trait_names_from_bounds(&type_param.bounds);
320            if !traits.is_empty() {
321                return Some(traits);
322            }
323        }
324    }
325
326    // Check where clause (e.g., `fn foo<T>(token: T) where T: HasX64V2`)
327    if let Some(where_clause) = &sig.generics.where_clause {
328        for predicate in &where_clause.predicates {
329            if let syn::WherePredicate::Type(pred_type) = predicate
330                && let Type::Path(type_path) = &pred_type.bounded_ty
331                && let Some(seg) = type_path.path.segments.last()
332                && seg.ident == type_name
333            {
334                let traits = extract_trait_names_from_bounds(&pred_type.bounds);
335                if !traits.is_empty() {
336                    return Some(traits);
337                }
338            }
339        }
340    }
341
342    None
343}
344
345/// Convert trait names to features, collecting all features from all traits.
346fn traits_to_features(trait_names: &[String]) -> Option<Vec<&'static str>> {
347    let mut all_features = Vec::new();
348
349    for trait_name in trait_names {
350        if let Some(features) = trait_to_features(trait_name) {
351            for &feature in features {
352                if !all_features.contains(&feature) {
353                    all_features.push(feature);
354                }
355            }
356        }
357    }
358
359    if all_features.is_empty() {
360        None
361    } else {
362        Some(all_features)
363    }
364}
365
366/// Trait names that don't map to any CPU features. These are valid in the type
367/// system but cannot be used as token bounds in `#[arcane]`/`#[rite]` because
368/// the macros need concrete features to generate `#[target_feature]` attributes.
369const FEATURELESS_TRAIT_NAMES: &[&str] = &["SimdToken", "IntoConcreteToken"];
370
371/// Check if any trait names are featureless (no CPU feature mapping).
372/// Returns the first featureless trait name found.
373fn find_featureless_trait(trait_names: &[String]) -> Option<&'static str> {
374    for name in trait_names {
375        for &featureless in FEATURELESS_TRAIT_NAMES {
376            if name == featureless {
377                return Some(featureless);
378            }
379        }
380    }
381    None
382}
383
384/// Diagnose why `find_token_param` failed. Returns the name of a featureless
385/// trait if the signature has a parameter bounded by one (e.g., `SimdToken`).
386fn diagnose_featureless_token(sig: &Signature) -> Option<&'static str> {
387    for arg in &sig.inputs {
388        if let FnArg::Typed(PatType { ty, .. }) = arg
389            && let Some(info) = extract_token_type_info(ty)
390        {
391            match &info {
392                TokenTypeInfo::ImplTrait(names) => {
393                    if let Some(name) = find_featureless_trait(names) {
394                        return Some(name);
395                    }
396                }
397                TokenTypeInfo::Generic(type_name) => {
398                    // Check if the type name itself is a featureless trait
399                    // (e.g., `token: SimdToken` used as a bare path)
400                    let as_vec = vec![type_name.clone()];
401                    if let Some(name) = find_featureless_trait(&as_vec) {
402                        return Some(name);
403                    }
404                    // Check generic bounds (e.g., `T: SimdToken`)
405                    if let Some(bounds) = find_generic_bounds(sig, type_name)
406                        && let Some(name) = find_featureless_trait(&bounds)
407                    {
408                        return Some(name);
409                    }
410                }
411                TokenTypeInfo::Concrete(_) => {}
412            }
413        }
414    }
415    None
416}
417
418/// Result of finding a token parameter in a function signature.
419struct TokenParamInfo {
420    /// The parameter identifier (e.g., `token`)
421    ident: Ident,
422    /// Target features to enable (e.g., `["avx2", "fma"]`)
423    features: Vec<&'static str>,
424    /// Target architecture (Some for concrete tokens, None for traits/generics)
425    target_arch: Option<&'static str>,
426    /// Concrete token type name (Some for concrete tokens, None for traits/generics)
427    token_type_name: Option<String>,
428    /// Magetypes width namespace (e.g., "v3", "neon", "wasm128")
429    magetypes_namespace: Option<&'static str>,
430}
431
432/// Resolve magetypes namespace from a list of trait names.
433/// Returns the first matching namespace found.
434fn traits_to_magetypes_namespace(trait_names: &[String]) -> Option<&'static str> {
435    for name in trait_names {
436        if let Some(ns) = trait_to_magetypes_namespace(name) {
437            return Some(ns);
438        }
439    }
440    None
441}
442
443/// Given trait bound names, return the first matching target architecture.
444fn traits_to_arch(trait_names: &[String]) -> Option<&'static str> {
445    for name in trait_names {
446        if let Some(arch) = trait_to_arch(name) {
447            return Some(arch);
448        }
449    }
450    None
451}
452
453/// Find the first token parameter in a function signature.
454fn find_token_param(sig: &Signature) -> Option<TokenParamInfo> {
455    for arg in &sig.inputs {
456        match arg {
457            FnArg::Receiver(_) => {
458                // Self receivers (self, &self, &mut self) are not yet supported.
459                // The macro creates an inner function, and Rust's inner functions
460                // cannot have `self` parameters. Supporting this would require
461                // AST rewriting to replace `self` with a regular parameter.
462                // See the module docs for the workaround.
463                continue;
464            }
465            FnArg::Typed(PatType { pat, ty, .. }) => {
466                if let Some(info) = extract_token_type_info(ty) {
467                    let (features, arch, token_name, mage_ns) = match info {
468                        TokenTypeInfo::Concrete(ref name) => {
469                            let features = token_to_features(name).map(|f| f.to_vec());
470                            let arch = token_to_arch(name);
471                            let ns = token_to_magetypes_namespace(name);
472                            (features, arch, Some(name.clone()), ns)
473                        }
474                        TokenTypeInfo::ImplTrait(ref trait_names) => {
475                            let ns = traits_to_magetypes_namespace(trait_names);
476                            let arch = traits_to_arch(trait_names);
477                            (traits_to_features(trait_names), arch, None, ns)
478                        }
479                        TokenTypeInfo::Generic(type_name) => {
480                            // Look up the generic parameter's bounds
481                            let bounds = find_generic_bounds(sig, &type_name);
482                            let features = bounds.as_ref().and_then(|t| traits_to_features(t));
483                            let ns = bounds
484                                .as_ref()
485                                .and_then(|t| traits_to_magetypes_namespace(t));
486                            let arch = bounds.as_ref().and_then(|t| traits_to_arch(t));
487                            (features, arch, None, ns)
488                        }
489                    };
490
491                    if let Some(features) = features {
492                        // Extract parameter name (or synthesize one for wildcard `_`)
493                        let ident = match pat.as_ref() {
494                            syn::Pat::Ident(pat_ident) => Some(pat_ident.ident.clone()),
495                            syn::Pat::Wild(w) => {
496                                Some(Ident::new("__archmage_token", w.underscore_token.span))
497                            }
498                            _ => None,
499                        };
500                        if let Some(ident) = ident {
501                            return Some(TokenParamInfo {
502                                ident,
503                                features,
504                                target_arch: arch,
505                                token_type_name: token_name,
506                                magetypes_namespace: mage_ns,
507                            });
508                        }
509                    }
510                }
511            }
512        }
513    }
514    None
515}
516
517/// Represents the kind of self receiver and the transformed parameter.
518enum SelfReceiver {
519    /// `self` (by value/move)
520    Owned,
521    /// `&self` (shared reference)
522    Ref,
523    /// `&mut self` (mutable reference)
524    RefMut,
525}
526
527/// Generate import statements to prepend to a function body.
528///
529/// Returns a `TokenStream` of `use` statements based on the import flags,
530/// target architecture, and magetypes namespace.
531fn generate_imports(
532    target_arch: Option<&str>,
533    magetypes_namespace: Option<&str>,
534    import_intrinsics: bool,
535    import_magetypes: bool,
536) -> proc_macro2::TokenStream {
537    let mut imports = proc_macro2::TokenStream::new();
538
539    if import_intrinsics && let Some(arch) = target_arch {
540        let arch_ident = format_ident!("{}", arch);
541        imports.extend(quote! {
542            #[allow(unused_imports)]
543            use archmage::intrinsics::#arch_ident::*;
544        });
545        // ScalarToken or unknown arch: import_intrinsics is a no-op
546    }
547
548    if import_magetypes && let Some(ns) = magetypes_namespace {
549        let ns_ident = format_ident!("{}", ns);
550        imports.extend(quote! {
551            #[allow(unused_imports)]
552            use magetypes::simd::#ns_ident::*;
553            #[allow(unused_imports)]
554            use magetypes::simd::backends::*;
555        });
556    }
557
558    imports
559}
560
561/// Shared implementation for arcane/arcane macros.
562fn arcane_impl(mut input_fn: LightFn, macro_name: &str, args: ArcaneArgs) -> TokenStream {
563    // Check for self receiver
564    let has_self_receiver = input_fn
565        .sig
566        .inputs
567        .first()
568        .map(|arg| matches!(arg, FnArg::Receiver(_)))
569        .unwrap_or(false);
570
571    // Nested mode is required when _self = Type is used (for Self replacement in nested fn).
572    // In sibling mode, self/Self work naturally since both fns live in the same impl scope.
573    // However, if there's a self receiver in nested mode, we still need _self = Type.
574    if has_self_receiver && args.nested && args.self_type.is_none() {
575        let msg = format!(
576            "{} with self receiver in nested mode requires `_self = Type` argument.\n\
577             Example: #[{}(nested, _self = MyType)]\n\
578             Use `_self` (not `self`) in the function body to refer to self.\n\
579             \n\
580             Alternatively, remove `nested` to use sibling expansion (default), \
581             which handles self/Self naturally.",
582            macro_name, macro_name
583        );
584        return syn::Error::new_spanned(&input_fn.sig, msg)
585            .to_compile_error()
586            .into();
587    }
588
589    // Find the token parameter, its features, target arch, and token type name
590    let TokenParamInfo {
591        ident: _token_ident,
592        features,
593        target_arch,
594        token_type_name,
595        magetypes_namespace,
596    } = match find_token_param(&input_fn.sig) {
597        Some(result) => result,
598        None => {
599            // Check for specific misuse: featureless traits like SimdToken
600            if let Some(trait_name) = diagnose_featureless_token(&input_fn.sig) {
601                let msg = format!(
602                    "`{trait_name}` cannot be used as a token bound in #[{macro_name}] \
603                     because it doesn't specify any CPU features.\n\
604                     \n\
605                     #[{macro_name}] needs concrete features to generate #[target_feature]. \
606                     Use a concrete token or a feature trait:\n\
607                     \n\
608                     Concrete tokens: X64V3Token, Desktop64, NeonToken, Arm64V2Token, ...\n\
609                     Feature traits:  impl HasX64V2, impl HasNeon, impl HasArm64V3, ..."
610                );
611                return syn::Error::new_spanned(&input_fn.sig, msg)
612                    .to_compile_error()
613                    .into();
614            }
615            let msg = format!(
616                "{} requires a token parameter. Supported forms:\n\
617                 - Concrete: `token: X64V3Token`\n\
618                 - impl Trait: `token: impl HasX64V2`\n\
619                 - Generic: `fn foo<T: HasX64V2>(token: T, ...)`\n\
620                 - With self: `#[{}(_self = Type)] fn method(&self, token: impl HasNeon, ...)`",
621                macro_name, macro_name
622            );
623            return syn::Error::new_spanned(&input_fn.sig, msg)
624                .to_compile_error()
625                .into();
626        }
627    };
628
629    // Check: import_intrinsics with AVX-512 features requires the avx512 cargo feature
630    // on archmage (propagated to archmage-macros). Without it, 512-bit safe memory ops
631    // from safe_unaligned_simd are not available, and _mm512_loadu_ps etc. would resolve
632    // to the unsafe core::arch versions (taking raw pointers instead of references).
633    //
634    // We check the resolved features (not the token name) so this works uniformly for
635    // concrete tokens (X64V4Token), trait bounds (impl HasX64V4), and generics (T: HasX64V4).
636    #[cfg(not(feature = "avx512"))]
637    if args.import_intrinsics && features.iter().any(|f| f.starts_with("avx512")) {
638        let token_desc = token_type_name.as_deref().unwrap_or("an AVX-512 token");
639        let msg = format!(
640            "Using {token_desc} with `import_intrinsics` requires the `avx512` feature.\n\
641             \n\
642             Add to your Cargo.toml:\n\
643             \x20 archmage = {{ version = \"...\", features = [\"avx512\"] }}\n\
644             \n\
645             Without it, 512-bit safe memory ops (_mm512_loadu_ps etc.) are not available.\n\
646             If you only need value intrinsics (no memory ops), remove `import_intrinsics`."
647        );
648        return syn::Error::new_spanned(&input_fn.sig, msg)
649            .to_compile_error()
650            .into();
651    }
652
653    // Prepend import statements to body if requested
654    let body_imports = generate_imports(
655        target_arch,
656        magetypes_namespace,
657        args.import_intrinsics,
658        args.import_magetypes,
659    );
660    if !body_imports.is_empty() {
661        let original_body = &input_fn.body;
662        input_fn.body = quote! {
663            #body_imports
664            #original_body
665        };
666    }
667
668    // Build target_feature attributes
669    let target_feature_attrs: Vec<Attribute> = features
670        .iter()
671        .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
672        .collect();
673
674    // Rename wildcard patterns (`_: Type`) to named params so the inner/sibling call works
675    let mut wild_rename_counter = 0u32;
676    for arg in &mut input_fn.sig.inputs {
677        if let FnArg::Typed(pat_type) = arg
678            && matches!(pat_type.pat.as_ref(), syn::Pat::Wild(_))
679        {
680            let ident = format_ident!("__archmage_wild_{}", wild_rename_counter);
681            wild_rename_counter += 1;
682            *pat_type.pat = syn::Pat::Ident(syn::PatIdent {
683                attrs: vec![],
684                by_ref: None,
685                mutability: None,
686                ident,
687                subpat: None,
688            });
689        }
690    }
691
692    // Choose inline attribute based on args
693    let inline_attr: Attribute = if args.inline_always {
694        parse_quote!(#[inline(always)])
695    } else {
696        parse_quote!(#[inline])
697    };
698
699    // On wasm32, #[target_feature(enable = "simd128")] functions are safe (Rust 1.54+).
700    // The wasm validation model guarantees unsupported instructions trap deterministically,
701    // so there's no UB from feature mismatch. Skip the unsafe wrapper entirely.
702    if target_arch == Some("wasm32") {
703        return arcane_impl_wasm_safe(
704            input_fn,
705            &args,
706            token_type_name,
707            target_feature_attrs,
708            inline_attr,
709        );
710    }
711
712    if args.nested {
713        arcane_impl_nested(
714            input_fn,
715            &args,
716            target_arch,
717            token_type_name,
718            target_feature_attrs,
719            inline_attr,
720        )
721    } else {
722        arcane_impl_sibling(
723            input_fn,
724            &args,
725            target_arch,
726            token_type_name,
727            target_feature_attrs,
728            inline_attr,
729        )
730    }
731}
732
733/// WASM-safe expansion: emits rite-style output (no unsafe wrapper).
734///
735/// On wasm32, `#[target_feature(enable = "simd128")]` is safe — the wasm validation
736/// model traps deterministically on unsupported instructions, so there's no UB.
737/// We emit the function directly with `#[target_feature]` + `#[inline]`, like `#[rite]`.
738///
739/// If `_self = Type` is set, we inject `let _self = self;` at the top of the body
740/// (the function stays in impl scope, so `Self` resolves naturally — no replacement needed).
741fn arcane_impl_wasm_safe(
742    input_fn: LightFn,
743    args: &ArcaneArgs,
744    token_type_name: Option<String>,
745    target_feature_attrs: Vec<Attribute>,
746    inline_attr: Attribute,
747) -> TokenStream {
748    let vis = &input_fn.vis;
749    let sig = &input_fn.sig;
750    let fn_name = &sig.ident;
751    let attrs = &input_fn.attrs;
752
753    let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
754
755    // If _self = Type is set, inject `let _self = self;` at top of body so user code
756    // referencing `_self` works. The function remains in impl scope, so `Self` resolves
757    // naturally — no Self replacement needed (unlike nested mode's inner fn).
758    let body = if args.self_type.is_some() {
759        let original_body = &input_fn.body;
760        quote! {
761            let _self = self;
762            #original_body
763        }
764    } else {
765        input_fn.body.clone()
766    };
767
768    // Prepend target_feature + inline attrs, filtering user #[inline] to avoid duplicates
769    let mut new_attrs = target_feature_attrs;
770    new_attrs.push(inline_attr);
771    for attr in filter_inline_attrs(attrs) {
772        new_attrs.push(attr.clone());
773    }
774
775    let stub = if args.stub {
776        // Build stub args for suppressing unused-variable warnings
777        let stub_args: Vec<proc_macro2::TokenStream> = sig
778            .inputs
779            .iter()
780            .filter_map(|arg| match arg {
781                FnArg::Typed(pat_type) => {
782                    if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
783                        let ident = &pat_ident.ident;
784                        Some(quote!(#ident))
785                    } else {
786                        None
787                    }
788                }
789                FnArg::Receiver(_) => None,
790            })
791            .collect();
792
793        quote! {
794            #[cfg(not(target_arch = "wasm32"))]
795            #vis #sig {
796                let _ = (#(#stub_args),*);
797                unreachable!(
798                    "BUG: {}() was called but requires {} (target_arch = \"wasm32\"). \
799                     {}::summon() returns None on this architecture, so this function \
800                     is unreachable in safe code. If you used forge_token_dangerously(), \
801                     that is the bug.",
802                    stringify!(#fn_name),
803                    #token_type_str,
804                    #token_type_str,
805                )
806            }
807        }
808    } else {
809        quote! {}
810    };
811
812    let expanded = quote! {
813        #[cfg(target_arch = "wasm32")]
814        #(#new_attrs)*
815        #vis #sig {
816            #body
817        }
818
819        #stub
820    };
821
822    expanded.into()
823}
824
825/// Sibling expansion (default): generates two functions at the same scope level.
826///
827/// ```ignore
828/// // #[arcane] fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { body }
829/// // expands to:
830/// #[cfg(target_arch = "x86_64")]
831/// #[doc(hidden)]
832/// #[target_feature(enable = "avx2,fma,...")]
833/// #[inline]
834/// fn __arcane_process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { body }
835///
836/// #[cfg(target_arch = "x86_64")]
837/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] {
838///     unsafe { __arcane_process(token, data) }
839/// }
840/// ```
841///
842/// The sibling function is safe (Rust 2024 edition allows safe `#[target_feature]`
843/// functions). Only the call from the wrapper needs `unsafe` because the wrapper
844/// lacks matching target features. Compatible with `#![forbid(unsafe_code)]`.
845///
846/// Self/self work naturally since both functions live in the same impl scope.
847fn arcane_impl_sibling(
848    input_fn: LightFn,
849    args: &ArcaneArgs,
850    target_arch: Option<&str>,
851    token_type_name: Option<String>,
852    target_feature_attrs: Vec<Attribute>,
853    inline_attr: Attribute,
854) -> TokenStream {
855    let vis = &input_fn.vis;
856    let sig = &input_fn.sig;
857    let fn_name = &sig.ident;
858    let generics = &sig.generics;
859    let where_clause = &generics.where_clause;
860    let inputs = &sig.inputs;
861    let output = &sig.output;
862    let body = &input_fn.body;
863    // Filter out user #[inline] attrs to avoid duplicates (will become a hard error).
864    // The wrapper gets #[inline(always)] unconditionally — it's a trivial unsafe { sibling() }.
865    let attrs = filter_inline_attrs(&input_fn.attrs);
866    // Lint-control attrs (#[allow(...)], #[expect(...)], etc.) must also go on the sibling,
867    // because the sibling has the same parameters and clippy lints it independently.
868    let lint_attrs = filter_lint_attrs(&input_fn.attrs);
869
870    let sibling_name = format_ident!("__arcane_{}", fn_name);
871
872    // Detect self receiver
873    let has_self_receiver = inputs
874        .first()
875        .map(|arg| matches!(arg, FnArg::Receiver(_)))
876        .unwrap_or(false);
877
878    // Build sibling signature: same as original but with sibling name, #[doc(hidden)]
879    // NOT unsafe — Rust 2024 edition allows safe #[target_feature] functions.
880    // Only the call from non-matching context (the wrapper) needs unsafe.
881    let sibling_sig_inputs = inputs;
882
883    // Build turbofish for forwarding type/const generic params to sibling
884    let turbofish = build_turbofish(generics);
885
886    // Build the call from wrapper to sibling
887    let sibling_call = if has_self_receiver {
888        // Method: self.__arcane_fn::<T, N>(other_args...)
889        let other_args: Vec<proc_macro2::TokenStream> = inputs
890            .iter()
891            .skip(1) // skip self receiver
892            .filter_map(|arg| {
893                if let FnArg::Typed(pat_type) = arg
894                    && let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref()
895                {
896                    let ident = &pat_ident.ident;
897                    Some(quote!(#ident))
898                } else {
899                    None
900                }
901            })
902            .collect();
903        quote! { self.#sibling_name #turbofish(#(#other_args),*) }
904    } else {
905        // Free function: __arcane_fn::<T, N>(all_args...)
906        let all_args: Vec<proc_macro2::TokenStream> = inputs
907            .iter()
908            .filter_map(|arg| {
909                if let FnArg::Typed(pat_type) = arg
910                    && let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref()
911                {
912                    let ident = &pat_ident.ident;
913                    Some(quote!(#ident))
914                } else {
915                    None
916                }
917            })
918            .collect();
919        quote! { #sibling_name #turbofish(#(#all_args),*) }
920    };
921
922    // Build stub args for suppressing unused warnings
923    let stub_args: Vec<proc_macro2::TokenStream> = inputs
924        .iter()
925        .filter_map(|arg| match arg {
926            FnArg::Typed(pat_type) => {
927                if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
928                    let ident = &pat_ident.ident;
929                    Some(quote!(#ident))
930                } else {
931                    None
932                }
933            }
934            FnArg::Receiver(_) => None, // self doesn't need _ = suppression
935        })
936        .collect();
937
938    let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
939
940    let cfg_guard = gen_cfg_guard(target_arch, args.cfg_feature.as_deref());
941
942    let expanded = if target_arch.is_some() {
943        // Sibling function: #[doc(hidden)] #[target_feature] fn __arcane_fn(...)
944        // Always private — only the wrapper is user-visible.
945        // Safe declaration — Rust 2024 allows safe #[target_feature] functions.
946        let sibling_fn = quote! {
947            #cfg_guard
948            #[doc(hidden)]
949            #(#lint_attrs)*
950            #(#target_feature_attrs)*
951            #inline_attr
952            fn #sibling_name #generics (#sibling_sig_inputs) #output #where_clause {
953                #body
954            }
955        };
956
957        // Wrapper function: fn original_name(...) { unsafe { sibling_call } }
958        // The unsafe block is needed because the sibling has #[target_feature] and
959        // the wrapper doesn't — calling across this boundary requires unsafe.
960        let wrapper_fn = quote! {
961            #cfg_guard
962            #(#attrs)*
963            #[inline(always)]
964            #vis #sig {
965                // SAFETY: The token parameter proves the required CPU features are available.
966                // Calling a #[target_feature] function from a non-matching context requires
967                // unsafe because the CPU may not support those instructions. The token's
968                // existence proves summon() succeeded, so the features are available.
969                unsafe { #sibling_call }
970            }
971        };
972
973        // Optional stub for other architectures / missing feature
974        let stub = if args.stub {
975            let arch_str = target_arch.unwrap_or("unknown");
976            // Negate the cfg guard used for the real implementation
977            let not_cfg = match (target_arch, args.cfg_feature.as_deref()) {
978                (Some(arch), Some(feat)) => {
979                    quote! { #[cfg(not(all(target_arch = #arch, feature = #feat)))] }
980                }
981                (Some(arch), None) => quote! { #[cfg(not(target_arch = #arch))] },
982                _ => quote! {},
983            };
984            quote! {
985                #not_cfg
986                #(#attrs)*
987                #vis #sig {
988                    let _ = (#(#stub_args),*);
989                    unreachable!(
990                        "BUG: {}() was called but requires {} (target_arch = \"{}\"). \
991                         {}::summon() returns None on this architecture, so this function \
992                         is unreachable in safe code. If you used forge_token_dangerously(), \
993                         that is the bug.",
994                        stringify!(#fn_name),
995                        #token_type_str,
996                        #arch_str,
997                        #token_type_str,
998                    )
999                }
1000            }
1001        } else {
1002            quote! {}
1003        };
1004
1005        quote! {
1006            #sibling_fn
1007            #wrapper_fn
1008            #stub
1009        }
1010    } else {
1011        // No specific arch (trait bounds or generic) - no cfg guards, no stub needed.
1012        // Still use sibling pattern for consistency. Sibling is always private.
1013        let sibling_fn = quote! {
1014            #[doc(hidden)]
1015            #(#lint_attrs)*
1016            #(#target_feature_attrs)*
1017            #inline_attr
1018            fn #sibling_name #generics (#sibling_sig_inputs) #output #where_clause {
1019                #body
1020            }
1021        };
1022
1023        let wrapper_fn = quote! {
1024            #(#attrs)*
1025            #[inline(always)]
1026            #vis #sig {
1027                // SAFETY: The token proves the required CPU features are available.
1028                unsafe { #sibling_call }
1029            }
1030        };
1031
1032        quote! {
1033            #sibling_fn
1034            #wrapper_fn
1035        }
1036    };
1037
1038    expanded.into()
1039}
1040
1041/// Nested inner function expansion (opt-in via `nested` or `_self = Type`).
1042///
1043/// This is the original approach: generates a nested inner function inside the
1044/// original function. Required when `_self = Type` is used because Self must be
1045/// replaced in the nested function (where it's not in scope).
1046fn arcane_impl_nested(
1047    input_fn: LightFn,
1048    args: &ArcaneArgs,
1049    target_arch: Option<&str>,
1050    token_type_name: Option<String>,
1051    target_feature_attrs: Vec<Attribute>,
1052    inline_attr: Attribute,
1053) -> TokenStream {
1054    let vis = &input_fn.vis;
1055    let sig = &input_fn.sig;
1056    let fn_name = &sig.ident;
1057    let generics = &sig.generics;
1058    let where_clause = &generics.where_clause;
1059    let inputs = &sig.inputs;
1060    let output = &sig.output;
1061    let body = &input_fn.body;
1062    // Filter out user #[inline] attrs to avoid duplicates (will become a hard error).
1063    let attrs = filter_inline_attrs(&input_fn.attrs);
1064    // Propagate lint attrs to inner function (same issue as sibling mode — #17)
1065    let lint_attrs = filter_lint_attrs(&input_fn.attrs);
1066
1067    // Determine self receiver type if present
1068    let self_receiver_kind: Option<SelfReceiver> = inputs.first().and_then(|arg| match arg {
1069        FnArg::Receiver(receiver) => {
1070            if receiver.reference.is_none() {
1071                Some(SelfReceiver::Owned)
1072            } else if receiver.mutability.is_some() {
1073                Some(SelfReceiver::RefMut)
1074            } else {
1075                Some(SelfReceiver::Ref)
1076            }
1077        }
1078        _ => None,
1079    });
1080
1081    // Build inner function parameters, transforming self if needed.
1082    // Also replace Self in non-self parameter types when _self = Type is set,
1083    // since the inner function is a nested fn where Self from the impl is not in scope.
1084    let inner_params: Vec<proc_macro2::TokenStream> = inputs
1085        .iter()
1086        .map(|arg| match arg {
1087            FnArg::Receiver(_) => {
1088                // Transform self receiver to _self parameter
1089                let self_ty = args.self_type.as_ref().unwrap();
1090                match self_receiver_kind.as_ref().unwrap() {
1091                    SelfReceiver::Owned => quote!(_self: #self_ty),
1092                    SelfReceiver::Ref => quote!(_self: &#self_ty),
1093                    SelfReceiver::RefMut => quote!(_self: &mut #self_ty),
1094                }
1095            }
1096            FnArg::Typed(pat_type) => {
1097                if let Some(ref self_ty) = args.self_type {
1098                    replace_self_in_tokens(quote!(#pat_type), self_ty)
1099                } else {
1100                    quote!(#pat_type)
1101                }
1102            }
1103        })
1104        .collect();
1105
1106    // Build inner function call arguments
1107    let inner_args: Vec<proc_macro2::TokenStream> = inputs
1108        .iter()
1109        .filter_map(|arg| match arg {
1110            FnArg::Typed(pat_type) => {
1111                if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
1112                    let ident = &pat_ident.ident;
1113                    Some(quote!(#ident))
1114                } else {
1115                    None
1116                }
1117            }
1118            FnArg::Receiver(_) => Some(quote!(self)), // Pass self to inner as _self
1119        })
1120        .collect();
1121
1122    let inner_fn_name = format_ident!("__simd_inner_{}", fn_name);
1123
1124    // Build turbofish for forwarding type/const generic params to inner function
1125    let turbofish = build_turbofish(generics);
1126
1127    // Transform output, body, and where clause to replace Self with concrete type if needed.
1128    let (inner_output, inner_body, inner_where_clause): (
1129        proc_macro2::TokenStream,
1130        proc_macro2::TokenStream,
1131        proc_macro2::TokenStream,
1132    ) = if let Some(ref self_ty) = args.self_type {
1133        let transformed_output = replace_self_in_tokens(output.to_token_stream(), self_ty);
1134        let transformed_body = replace_self_in_tokens(body.clone(), self_ty);
1135        let transformed_where = where_clause
1136            .as_ref()
1137            .map(|wc| replace_self_in_tokens(wc.to_token_stream(), self_ty))
1138            .unwrap_or_default();
1139        (transformed_output, transformed_body, transformed_where)
1140    } else {
1141        (
1142            output.to_token_stream(),
1143            body.clone(),
1144            where_clause
1145                .as_ref()
1146                .map(|wc| wc.to_token_stream())
1147                .unwrap_or_default(),
1148        )
1149    };
1150
1151    let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
1152    let cfg_guard = gen_cfg_guard(target_arch, args.cfg_feature.as_deref());
1153
1154    let expanded = if target_arch.is_some() {
1155        let stub = if args.stub {
1156            let arch_str = target_arch.unwrap_or("unknown");
1157            let not_cfg = match (target_arch, args.cfg_feature.as_deref()) {
1158                (Some(arch), Some(feat)) => {
1159                    quote! { #[cfg(not(all(target_arch = #arch, feature = #feat)))] }
1160                }
1161                (Some(arch), None) => quote! { #[cfg(not(target_arch = #arch))] },
1162                _ => quote! {},
1163            };
1164            quote! {
1165                #not_cfg
1166                #(#attrs)*
1167                #vis #sig {
1168                    let _ = (#(#inner_args),*);
1169                    unreachable!(
1170                        "BUG: {}() was called but requires {} (target_arch = \"{}\"). \
1171                         {}::summon() returns None on this architecture, so this function \
1172                         is unreachable in safe code. If you used forge_token_dangerously(), \
1173                         that is the bug.",
1174                        stringify!(#fn_name),
1175                        #token_type_str,
1176                        #arch_str,
1177                        #token_type_str,
1178                    )
1179                }
1180            }
1181        } else {
1182            quote! {}
1183        };
1184
1185        quote! {
1186            // Real implementation for the correct architecture
1187            #cfg_guard
1188            #(#attrs)*
1189            #[inline(always)]
1190            #vis #sig {
1191                #(#target_feature_attrs)*
1192                #inline_attr
1193                #(#lint_attrs)*
1194                fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #inner_where_clause {
1195                    #inner_body
1196                }
1197
1198                // SAFETY: The token parameter proves the required CPU features are available.
1199                unsafe { #inner_fn_name #turbofish(#(#inner_args),*) }
1200            }
1201
1202            #stub
1203        }
1204    } else {
1205        // No specific arch (trait bounds or generic) - generate without cfg guards
1206        quote! {
1207            #(#attrs)*
1208            #[inline(always)]
1209            #vis #sig {
1210                #(#target_feature_attrs)*
1211                #inline_attr
1212                #(#lint_attrs)*
1213                fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #inner_where_clause {
1214                    #inner_body
1215                }
1216
1217                // SAFETY: The token proves the required CPU features are available.
1218                unsafe { #inner_fn_name #turbofish(#(#inner_args),*) }
1219            }
1220        }
1221    };
1222
1223    expanded.into()
1224}
1225
1226/// Mark a function as an arcane SIMD function.
1227///
1228/// This macro generates a safe wrapper around a `#[target_feature]` function.
1229/// The token parameter type determines which CPU features are enabled.
1230///
1231/// # Expansion Modes
1232///
1233/// ## Sibling (default)
1234///
1235/// Generates two functions at the same scope: a safe `#[target_feature]` sibling
1236/// and a safe wrapper. `self`/`Self` work naturally since both functions share scope.
1237/// Compatible with `#![forbid(unsafe_code)]`.
1238///
1239/// ```ignore
1240/// #[arcane]
1241/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { /* body */ }
1242/// // Expands to (x86_64 only):
1243/// #[cfg(target_arch = "x86_64")]
1244/// #[doc(hidden)]
1245/// #[target_feature(enable = "avx2,fma,...")]
1246/// fn __arcane_process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { /* body */ }
1247///
1248/// #[cfg(target_arch = "x86_64")]
1249/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] {
1250///     unsafe { __arcane_process(token, data) }
1251/// }
1252/// ```
1253///
1254/// Methods work naturally:
1255///
1256/// ```ignore
1257/// impl MyType {
1258///     #[arcane]
1259///     fn compute(&self, token: X64V3Token) -> f32 {
1260///         self.data.iter().sum()  // self/Self just work!
1261///     }
1262/// }
1263/// ```
1264///
1265/// ## Nested (`nested` or `_self = Type`)
1266///
1267/// Generates a nested inner function inside the original. Required for trait impls
1268/// (where sibling functions would fail) and when `_self = Type` is used.
1269///
1270/// ```ignore
1271/// impl SimdOps for MyType {
1272///     #[arcane(_self = MyType)]
1273///     fn compute(&self, token: X64V3Token) -> Self {
1274///         // Use _self instead of self, Self replaced with MyType
1275///         _self.data.iter().sum()
1276///     }
1277/// }
1278/// ```
1279///
1280/// # Cross-Architecture Behavior
1281///
1282/// **Default (cfg-out):** On the wrong architecture, the function is not emitted
1283/// at all — no stub, no dead code. Code that references it must be cfg-gated.
1284///
1285/// **With `stub`:** Generates an `unreachable!()` stub on wrong architectures.
1286/// Use when cross-arch dispatch references the function without cfg guards.
1287///
1288/// ```ignore
1289/// #[arcane(stub)]  // generates stub on wrong arch
1290/// fn process_neon(token: NeonToken, data: &[f32]) -> f32 { ... }
1291/// ```
1292///
1293/// `incant!` is unaffected — it already cfg-gates dispatch calls by architecture.
1294///
1295/// # Token Parameter Forms
1296///
1297/// ```ignore
1298/// // Concrete token
1299/// #[arcane]
1300/// fn process(token: X64V3Token, data: &[f32; 8]) -> [f32; 8] { ... }
1301///
1302/// // impl Trait bound
1303/// #[arcane]
1304/// fn process(token: impl HasX64V2, data: &[f32; 8]) -> [f32; 8] { ... }
1305///
1306/// // Generic with inline or where-clause bounds
1307/// #[arcane]
1308/// fn process<T: HasX64V2>(token: T, data: &[f32; 8]) -> [f32; 8] { ... }
1309///
1310/// // Wildcard
1311/// #[arcane]
1312/// fn process(_: X64V3Token, data: &[f32; 8]) -> [f32; 8] { ... }
1313/// ```
1314///
1315/// # Options
1316///
1317/// | Option | Effect |
1318/// |--------|--------|
1319/// | `stub` | Generate `unreachable!()` stub on wrong architecture |
1320/// | `nested` | Use nested inner function instead of sibling |
1321/// | `_self = Type` | Implies `nested`, transforms self receiver, replaces Self |
1322/// | `inline_always` | Use `#[inline(always)]` (requires nightly) |
1323/// | `import_intrinsics` | Auto-import `archmage::intrinsics::{arch}::*` (includes safe memory ops) |
1324/// | `import_magetypes` | Auto-import `magetypes::simd::{ns}::*` and `magetypes::simd::backends::*` |
1325///
1326/// ## Auto-Imports
1327///
1328/// `import_intrinsics` and `import_magetypes` inject `use` statements into the
1329/// function body, eliminating boilerplate. The macro derives the architecture and
1330/// namespace from the token type:
1331///
1332/// ```ignore
1333/// // Without auto-imports — lots of boilerplate:
1334/// use std::arch::x86_64::*;
1335/// use magetypes::simd::v3::*;
1336///
1337/// #[arcane]
1338/// fn process(token: X64V3Token, data: &[f32; 8]) -> f32 {
1339///     let v = f32x8::load(token, data);
1340///     let zero = _mm256_setzero_ps();
1341///     // ...
1342/// }
1343///
1344/// // With auto-imports — clean:
1345/// #[arcane(import_intrinsics, import_magetypes)]
1346/// fn process(token: X64V3Token, data: &[f32; 8]) -> f32 {
1347///     let v = f32x8::load(token, data);
1348///     let zero = _mm256_setzero_ps();
1349///     // ...
1350/// }
1351/// ```
1352///
1353/// The namespace mapping is token-driven:
1354///
1355/// | Token | `import_intrinsics` | `import_magetypes` |
1356/// |-------|--------------------|--------------------|
1357/// | `X64V1..V3Token` | `archmage::intrinsics::x86_64::*` | `magetypes::simd::v3::*` |
1358/// | `X64V4Token` | `archmage::intrinsics::x86_64::*` | `magetypes::simd::v4::*` |
1359/// | `X64V4xToken` | `archmage::intrinsics::x86_64::*` | `magetypes::simd::v4x::*` |
1360/// | `NeonToken` / ARM | `archmage::intrinsics::aarch64::*` | `magetypes::simd::neon::*` |
1361/// | `Wasm128Token` | `archmage::intrinsics::wasm32::*` | `magetypes::simd::wasm128::*` |
1362///
1363/// Works with concrete tokens, `impl Trait` bounds, and generic parameters.
1364///
1365/// # Supported Tokens
1366///
1367/// - **x86_64**: `X64V2Token`, `X64V3Token`/`Desktop64`, `X64V4Token`/`Avx512Token`/`Server64`,
1368///   `X64V4xToken`, `Avx512Fp16Token`, `X64CryptoToken`, `X64V3CryptoToken`
1369/// - **ARM**: `NeonToken`/`Arm64`, `Arm64V2Token`, `Arm64V3Token`,
1370///   `NeonAesToken`, `NeonSha3Token`, `NeonCrcToken`
1371/// - **WASM**: `Wasm128Token`
1372///
1373/// # Supported Trait Bounds
1374///
1375/// `HasX64V2`, `HasX64V4`, `HasNeon`, `HasNeonAes`, `HasNeonSha3`, `HasArm64V2`, `HasArm64V3`
1376///
1377/// ```ignore
1378/// #![feature(target_feature_inline_always)]
1379///
1380/// #[arcane(inline_always)]
1381/// fn fast_kernel(token: Avx2Token, data: &mut [f32]) {
1382///     // Inner function will use #[inline(always)]
1383/// }
1384/// ```
1385#[proc_macro_attribute]
1386pub fn arcane(attr: TokenStream, item: TokenStream) -> TokenStream {
1387    let args = parse_macro_input!(attr as ArcaneArgs);
1388    let input_fn = parse_macro_input!(item as LightFn);
1389    arcane_impl(input_fn, "arcane", args)
1390}
1391
1392/// Legacy alias for [`arcane`].
1393///
1394/// **Deprecated:** Use `#[arcane]` instead. This alias exists only for migration.
1395#[proc_macro_attribute]
1396#[doc(hidden)]
1397pub fn simd_fn(attr: TokenStream, item: TokenStream) -> TokenStream {
1398    let args = parse_macro_input!(attr as ArcaneArgs);
1399    let input_fn = parse_macro_input!(item as LightFn);
1400    arcane_impl(input_fn, "simd_fn", args)
1401}
1402
1403/// Descriptive alias for [`arcane`].
1404///
1405/// Generates a safe wrapper around a `#[target_feature]` inner function.
1406/// The token type in your signature determines which CPU features are enabled.
1407/// Creates an LLVM optimization boundary — use [`token_target_features`]
1408/// (alias for [`rite`]) for inner helpers to avoid this.
1409///
1410/// Since Rust 1.85, value-based SIMD intrinsics are safe inside
1411/// `#[target_feature]` functions. This macro generates the `#[target_feature]`
1412/// wrapper so you never need to write `unsafe` for SIMD code.
1413///
1414/// See [`arcane`] for full documentation and examples.
1415#[proc_macro_attribute]
1416pub fn token_target_features_boundary(attr: TokenStream, item: TokenStream) -> TokenStream {
1417    let args = parse_macro_input!(attr as ArcaneArgs);
1418    let input_fn = parse_macro_input!(item as LightFn);
1419    arcane_impl(input_fn, "token_target_features_boundary", args)
1420}
1421
1422// ============================================================================
1423// Rite macro for inner SIMD functions (inlines into matching #[target_feature] callers)
1424// ============================================================================
1425
1426/// Annotate inner SIMD helpers called from `#[arcane]` functions.
1427///
1428/// Unlike `#[arcane]`, which creates an inner `#[target_feature]` function behind
1429/// a safe boundary, `#[rite]` adds `#[target_feature]` and `#[inline]` directly.
1430/// LLVM inlines it into any caller with matching features — no boundary crossing.
1431///
1432/// # Three Modes
1433///
1434/// **Token-based:** Reads the token type from the function signature.
1435/// ```ignore
1436/// #[rite]
1437/// fn helper(_: X64V3Token, v: __m256) -> __m256 { _mm256_add_ps(v, v) }
1438/// ```
1439///
1440/// **Tier-based:** Specify the tier name directly, no token parameter needed.
1441/// ```ignore
1442/// #[rite(v3)]
1443/// fn helper(v: __m256) -> __m256 { _mm256_add_ps(v, v) }
1444/// ```
1445///
1446/// Both produce identical code. The token form can be easier to remember if
1447/// you already have the token in scope.
1448///
1449/// **Multi-tier:** Specify multiple tiers to generate suffixed variants.
1450/// ```ignore
1451/// #[rite(v3, v4)]
1452/// fn process(data: &[f32; 4]) -> f32 { data.iter().sum() }
1453/// // Generates: process_v3() and process_v4()
1454/// ```
1455///
1456/// Each variant gets its own `#[target_feature]` and `#[cfg(target_arch)]`.
1457/// Since Rust 1.85, calling these from a matching `#[arcane]` or `#[rite]`
1458/// context is safe — no `unsafe` needed when the caller has matching or
1459/// superset features.
1460///
1461/// # Safety
1462///
1463/// `#[rite]` functions can only be safely called from contexts where the
1464/// required CPU features are enabled:
1465/// - From within `#[arcane]` functions with matching/superset tokens
1466/// - From within other `#[rite]` functions with matching/superset tokens
1467/// - From code compiled with `-Ctarget-cpu` that enables the features
1468///
1469/// Calling from other contexts requires `unsafe` and the caller must ensure
1470/// the CPU supports the required features.
1471///
1472/// # Cross-Architecture Behavior
1473///
1474/// Like `#[arcane]`, defaults to cfg-out (no function on wrong arch).
1475/// Use `#[rite(stub)]` to generate an unreachable stub instead.
1476///
1477/// # Options
1478///
1479/// | Option | Effect |
1480/// |--------|--------|
1481/// | tier name(s) | `v3`, `neon`, etc. One = single function; multiple = suffixed variants |
1482/// | `stub` | Generate `unreachable!()` stub on wrong architecture |
1483/// | `import_intrinsics` | Auto-import `archmage::intrinsics::{arch}::*` (includes safe memory ops) |
1484/// | `import_magetypes` | Auto-import `magetypes::simd::{ns}::*` and `magetypes::simd::backends::*` |
1485///
1486/// See `#[arcane]` docs for the full namespace mapping table.
1487///
1488/// # Comparison with #[arcane]
1489///
1490/// | Aspect | `#[arcane]` | `#[rite]` |
1491/// |--------|-------------|-----------|
1492/// | Creates wrapper | Yes | No |
1493/// | Entry point | Yes | No |
1494/// | Inlines into caller | No (barrier) | Yes |
1495/// | Safe to call anywhere | Yes (with token) | Only from feature-enabled context |
1496/// | Multi-tier variants | No | Yes (`#[rite(v3, v4, neon)]`) |
1497/// | `stub` param | Yes | Yes |
1498/// | `import_intrinsics` | Yes | Yes |
1499/// | `import_magetypes` | Yes | Yes |
1500#[proc_macro_attribute]
1501pub fn rite(attr: TokenStream, item: TokenStream) -> TokenStream {
1502    let args = parse_macro_input!(attr as RiteArgs);
1503    let input_fn = parse_macro_input!(item as LightFn);
1504    rite_impl(input_fn, args)
1505}
1506
1507/// Descriptive alias for [`rite`].
1508///
1509/// Applies `#[target_feature]` + `#[inline]` based on the token type in your
1510/// function signature. No wrapper, no optimization boundary. Use for functions
1511/// called from within `#[arcane]`/`#[token_target_features_boundary]` code.
1512///
1513/// Since Rust 1.85, calling a `#[target_feature]` function from another function
1514/// with matching features is safe — no `unsafe` needed.
1515///
1516/// See [`rite`] for full documentation and examples.
1517#[proc_macro_attribute]
1518pub fn token_target_features(attr: TokenStream, item: TokenStream) -> TokenStream {
1519    let args = parse_macro_input!(attr as RiteArgs);
1520    let input_fn = parse_macro_input!(item as LightFn);
1521    rite_impl(input_fn, args)
1522}
1523
1524/// Arguments for the `#[rite]` macro.
1525#[derive(Default)]
1526struct RiteArgs {
1527    /// Generate an `unreachable!()` stub on the wrong architecture.
1528    /// Default is false (cfg-out: no function emitted on wrong arch).
1529    stub: bool,
1530    /// Inject `use archmage::intrinsics::{arch}::*;` (includes safe memory ops).
1531    import_intrinsics: bool,
1532    /// Inject `use magetypes::simd::{ns}::*;`, `use magetypes::simd::generic::*;`,
1533    /// and `use magetypes::simd::backends::*;`.
1534    import_magetypes: bool,
1535    /// Tiers specified directly (e.g., `#[rite(v3)]` or `#[rite(v3, v4, neon)]`).
1536    /// Stored as canonical token names (e.g., "X64V3Token").
1537    /// Single tier: generates one function (no suffix, no token parameter needed).
1538    /// Multiple tiers: generates suffixed variants (e.g., `fn_v3`, `fn_v4`, `fn_neon`).
1539    tier_tokens: Vec<String>,
1540    /// Additional cargo feature gate (same as arcane's cfg_feature).
1541    cfg_feature: Option<String>,
1542}
1543
1544impl Parse for RiteArgs {
1545    fn parse(input: ParseStream) -> syn::Result<Self> {
1546        let mut args = RiteArgs::default();
1547
1548        while !input.is_empty() {
1549            let ident: Ident = input.parse()?;
1550            match ident.to_string().as_str() {
1551                "stub" => args.stub = true,
1552                "import_intrinsics" => args.import_intrinsics = true,
1553                "import_magetypes" => args.import_magetypes = true,
1554                "cfg" => {
1555                    let content;
1556                    syn::parenthesized!(content in input);
1557                    let feat: Ident = content.parse()?;
1558                    args.cfg_feature = Some(feat.to_string());
1559                }
1560                other => {
1561                    if let Some(canonical) = tier_to_canonical_token(other) {
1562                        args.tier_tokens.push(String::from(canonical));
1563                    } else {
1564                        return Err(syn::Error::new(
1565                            ident.span(),
1566                            format!(
1567                                "unknown rite argument: `{}`. Supported: tier names \
1568                                 (v1, v2, v3, v4, neon, arm_v2, wasm128, ...), \
1569                                 `stub`, `import_intrinsics`, `import_magetypes`, `cfg(feature)`.",
1570                                other
1571                            ),
1572                        ));
1573                    }
1574                }
1575            }
1576            if input.peek(Token![,]) {
1577                let _: Token![,] = input.parse()?;
1578            }
1579        }
1580
1581        Ok(args)
1582    }
1583}
1584
1585/// Implementation for the `#[rite]` macro.
1586fn rite_impl(input_fn: LightFn, args: RiteArgs) -> TokenStream {
1587    // Multi-tier mode: generate suffixed variants for each tier
1588    if args.tier_tokens.len() > 1 {
1589        return rite_multi_tier_impl(input_fn, &args);
1590    }
1591
1592    // Single-tier or token-param mode
1593    rite_single_impl(input_fn, args)
1594}
1595
1596/// Generate a single `#[rite]` function (single tier or token-param mode).
1597fn rite_single_impl(mut input_fn: LightFn, args: RiteArgs) -> TokenStream {
1598    // Resolve features: either from tier name or from token parameter
1599    let TokenParamInfo {
1600        features,
1601        target_arch,
1602        token_type_name: _token_type_name,
1603        magetypes_namespace,
1604        ..
1605    } = if let Some(tier_token) = args.tier_tokens.first() {
1606        // Tier specified directly (e.g., #[rite(v3)]) — no token param needed
1607        let features = token_to_features(tier_token)
1608            .expect("tier_to_canonical_token returned invalid token name")
1609            .to_vec();
1610        let target_arch = token_to_arch(tier_token);
1611        let magetypes_namespace = token_to_magetypes_namespace(tier_token);
1612        TokenParamInfo {
1613            ident: Ident::new("_", proc_macro2::Span::call_site()),
1614            features,
1615            target_arch,
1616            token_type_name: Some(tier_token.clone()),
1617            magetypes_namespace,
1618        }
1619    } else {
1620        match find_token_param(&input_fn.sig) {
1621            Some(result) => result,
1622            None => {
1623                // Check for specific misuse: featureless traits like SimdToken
1624                if let Some(trait_name) = diagnose_featureless_token(&input_fn.sig) {
1625                    let msg = format!(
1626                        "`{trait_name}` cannot be used as a token bound in #[rite] \
1627                         because it doesn't specify any CPU features.\n\
1628                         \n\
1629                         #[rite] needs concrete features to generate #[target_feature]. \
1630                         Use a concrete token, a feature trait, or a tier name:\n\
1631                         \n\
1632                         Concrete tokens: X64V3Token, Desktop64, NeonToken, Arm64V2Token, ...\n\
1633                         Feature traits:  impl HasX64V2, impl HasNeon, impl HasArm64V3, ...\n\
1634                         Tier names:      #[rite(v3)], #[rite(neon)], #[rite(v4)], ..."
1635                    );
1636                    return syn::Error::new_spanned(&input_fn.sig, msg)
1637                        .to_compile_error()
1638                        .into();
1639                }
1640                let msg = "rite requires a token parameter or a tier name. Supported forms:\n\
1641                     - Tier name: `#[rite(v3)]`, `#[rite(neon)]`\n\
1642                     - Multi-tier: `#[rite(v3, v4, neon)]` (generates suffixed variants)\n\
1643                     - Concrete: `token: X64V3Token`\n\
1644                     - impl Trait: `token: impl HasX64V2`\n\
1645                     - Generic: `fn foo<T: HasX64V2>(token: T, ...)`";
1646                return syn::Error::new_spanned(&input_fn.sig, msg)
1647                    .to_compile_error()
1648                    .into();
1649            }
1650        }
1651    };
1652
1653    // Check: import_intrinsics with AVX-512 features requires the avx512 cargo feature.
1654    // Check resolved features (not token name) for uniform handling of concrete/trait/generic.
1655    #[cfg(not(feature = "avx512"))]
1656    if args.import_intrinsics && features.iter().any(|f| f.starts_with("avx512")) {
1657        let token_desc = _token_type_name.as_deref().unwrap_or("an AVX-512 token");
1658        let msg = format!(
1659            "Using {token_desc} with `import_intrinsics` requires the `avx512` feature.\n\
1660             \n\
1661             Add to your Cargo.toml:\n\
1662             \x20 archmage = {{ version = \"...\", features = [\"avx512\"] }}\n\
1663             \n\
1664             Without it, 512-bit safe memory ops (_mm512_loadu_ps etc.) are not available.\n\
1665             If you only need value intrinsics (no memory ops), remove `import_intrinsics`."
1666        );
1667        return syn::Error::new_spanned(&input_fn.sig, msg)
1668            .to_compile_error()
1669            .into();
1670    }
1671
1672    // Build target_feature attributes
1673    let target_feature_attrs: Vec<Attribute> = features
1674        .iter()
1675        .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
1676        .collect();
1677
1678    // Always use #[inline] - #[inline(always)] + #[target_feature] requires nightly
1679    let inline_attr: Attribute = parse_quote!(#[inline]);
1680
1681    // Prepend attributes to the function, filtering user #[inline] to avoid duplicates
1682    let mut new_attrs = target_feature_attrs;
1683    new_attrs.push(inline_attr);
1684    for attr in filter_inline_attrs(&input_fn.attrs) {
1685        new_attrs.push(attr.clone());
1686    }
1687    input_fn.attrs = new_attrs;
1688
1689    // Prepend import statements to body if requested
1690    let body_imports = generate_imports(
1691        target_arch,
1692        magetypes_namespace,
1693        args.import_intrinsics,
1694        args.import_magetypes,
1695    );
1696    if !body_imports.is_empty() {
1697        let original_body = &input_fn.body;
1698        input_fn.body = quote! {
1699            #body_imports
1700            #original_body
1701        };
1702    }
1703
1704    // If we know the target arch, generate cfg-gated impl (+ optional stub)
1705    let cfg_guard = gen_cfg_guard(target_arch, args.cfg_feature.as_deref());
1706    if target_arch.is_some() {
1707        let vis = &input_fn.vis;
1708        let sig = &input_fn.sig;
1709        let attrs = &input_fn.attrs;
1710        let body = &input_fn.body;
1711
1712        let stub = if args.stub {
1713            let not_cfg = match (target_arch, args.cfg_feature.as_deref()) {
1714                (Some(arch), Some(feat)) => {
1715                    quote! { #[cfg(not(all(target_arch = #arch, feature = #feat)))] }
1716                }
1717                (Some(arch), None) => quote! { #[cfg(not(target_arch = #arch))] },
1718                _ => quote! {},
1719            };
1720            quote! {
1721                #not_cfg
1722                #vis #sig {
1723                    unreachable!("This function requires a specific architecture and feature set")
1724                }
1725            }
1726        } else {
1727            quote! {}
1728        };
1729
1730        quote! {
1731            #cfg_guard
1732            #(#attrs)*
1733            #vis #sig {
1734                #body
1735            }
1736
1737            #stub
1738        }
1739        .into()
1740    } else {
1741        // No specific arch (trait bounds) - just emit the annotated function
1742        quote!(#input_fn).into()
1743    }
1744}
1745
1746/// Generate multiple suffixed `#[rite]` variants for multi-tier mode.
1747///
1748/// `#[rite(v3, v4, neon)]` on `fn process(...)` generates:
1749/// - `fn process_v3(...)` with `#[target_feature(enable = "avx2,fma,...")]`
1750/// - `fn process_v4(...)` with `#[target_feature(enable = "avx512f,...")]`
1751/// - `fn process_neon(...)` with `#[target_feature(enable = "neon")]`
1752///
1753/// Each variant is cfg-gated to its architecture and gets `#[inline]`.
1754fn rite_multi_tier_impl(input_fn: LightFn, args: &RiteArgs) -> TokenStream {
1755    let fn_name = &input_fn.sig.ident;
1756    let mut variants = proc_macro2::TokenStream::new();
1757
1758    for tier_token in &args.tier_tokens {
1759        let features = match token_to_features(tier_token) {
1760            Some(f) => f,
1761            None => {
1762                return syn::Error::new_spanned(
1763                    &input_fn.sig,
1764                    format!("unknown token `{tier_token}` in multi-tier #[rite]"),
1765                )
1766                .to_compile_error()
1767                .into();
1768            }
1769        };
1770        let target_arch = token_to_arch(tier_token);
1771        let magetypes_namespace = token_to_magetypes_namespace(tier_token);
1772
1773        // Check: import_intrinsics with AVX-512 features requires the avx512 cargo feature.
1774        #[cfg(not(feature = "avx512"))]
1775        if args.import_intrinsics && features.iter().any(|f| f.starts_with("avx512")) {
1776            let msg = format!(
1777                "Using {tier_token} with `import_intrinsics` requires the `avx512` feature.\n\
1778                 \n\
1779                 Add to your Cargo.toml:\n\
1780                 \x20 archmage = {{ version = \"...\", features = [\"avx512\"] }}\n\
1781                 \n\
1782                 Without it, 512-bit safe memory ops (_mm512_loadu_ps etc.) are not available.\n\
1783                 If you only need value intrinsics (no memory ops), remove `import_intrinsics`."
1784            );
1785            return syn::Error::new_spanned(&input_fn.sig, msg)
1786                .to_compile_error()
1787                .into();
1788        }
1789
1790        let suffix = canonical_token_to_tier_suffix(tier_token)
1791            .expect("canonical token must have a tier suffix");
1792
1793        // Build suffixed function name
1794        let suffixed_ident = format_ident!("{}_{}", fn_name, suffix);
1795
1796        // Clone and rename the function
1797        let mut variant_fn = input_fn.clone();
1798        variant_fn.sig.ident = suffixed_ident;
1799
1800        // Build target_feature attributes
1801        let target_feature_attrs: Vec<Attribute> = features
1802            .iter()
1803            .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
1804            .collect();
1805        let inline_attr: Attribute = parse_quote!(#[inline]);
1806
1807        let mut new_attrs = target_feature_attrs;
1808        new_attrs.push(inline_attr);
1809        for attr in filter_inline_attrs(&variant_fn.attrs) {
1810            new_attrs.push(attr.clone());
1811        }
1812        variant_fn.attrs = new_attrs;
1813
1814        // Prepend import statements if requested
1815        let body_imports = generate_imports(
1816            target_arch,
1817            magetypes_namespace,
1818            args.import_intrinsics,
1819            args.import_magetypes,
1820        );
1821        if !body_imports.is_empty() {
1822            let original_body = &variant_fn.body;
1823            variant_fn.body = quote! {
1824                #body_imports
1825                #original_body
1826            };
1827        }
1828
1829        // Emit cfg-gated variant
1830        let variant_cfg = gen_cfg_guard(target_arch, args.cfg_feature.as_deref());
1831        if target_arch.is_some() {
1832            let vis = &variant_fn.vis;
1833            let sig = &variant_fn.sig;
1834            let attrs = &variant_fn.attrs;
1835            let body = &variant_fn.body;
1836
1837            variants.extend(quote! {
1838                #variant_cfg
1839                #(#attrs)*
1840                #vis #sig {
1841                    #body
1842                }
1843            });
1844
1845            if args.stub {
1846                let not_cfg = match (target_arch, args.cfg_feature.as_deref()) {
1847                    (Some(arch), Some(feat)) => {
1848                        quote! { #[cfg(not(all(target_arch = #arch, feature = #feat)))] }
1849                    }
1850                    (Some(arch), None) => quote! { #[cfg(not(target_arch = #arch))] },
1851                    _ => quote! {},
1852                };
1853                let arch_str = target_arch.unwrap_or("unknown");
1854                variants.extend(quote! {
1855                    #not_cfg
1856                    #vis #sig {
1857                        unreachable!(concat!(
1858                            "This function requires ",
1859                            #arch_str,
1860                            " architecture"
1861                        ))
1862                    }
1863                });
1864            }
1865        } else {
1866            // No specific arch — just emit the annotated function
1867            variants.extend(quote!(#variant_fn));
1868        }
1869    }
1870
1871    variants.into()
1872}
1873
1874// =============================================================================
1875// magetypes! macro - generate platform variants from generic function
1876// =============================================================================
1877
1878/// Generate platform-specific variants from a function by replacing `Token`.
1879///
1880/// Use `Token` as a placeholder for the token type. The macro generates
1881/// suffixed variants with `Token` replaced by the concrete token type, and
1882/// each variant wrapped in the appropriate `#[cfg(target_arch = ...)]` guard.
1883///
1884/// # Default tiers
1885///
1886/// Without arguments, generates `_v3`, `_v4`, `_neon`, `_wasm128`, `_scalar`:
1887///
1888/// ```rust,ignore
1889/// #[magetypes]
1890/// fn process(token: Token, data: &[f32]) -> f32 {
1891///     inner_simd_work(token, data)
1892/// }
1893/// ```
1894///
1895/// # Explicit tiers
1896///
1897/// Specify which tiers to generate:
1898///
1899/// ```rust,ignore
1900/// #[magetypes(v1, v3, neon)]
1901/// fn process(token: Token, data: &[f32]) -> f32 {
1902///     inner_simd_work(token, data)
1903/// }
1904/// // Generates: process_v1, process_v3, process_neon, process_scalar
1905/// ```
1906///
1907/// `scalar` is always included implicitly.
1908///
1909/// Known tiers: `v1`, `v2`, `v3`, `v4`, `v4x`, `neon`, `neon_aes`,
1910/// `neon_sha3`, `neon_crc`, `wasm128`, `wasm128_relaxed`, `scalar`.
1911///
1912/// # What gets replaced
1913///
1914/// **Only `Token`** is replaced — with the concrete token type for each variant
1915/// (e.g., `archmage::X64V3Token`, `archmage::ScalarToken`). SIMD types like
1916/// `f32x8` and constants like `LANES` are **not** replaced by this macro.
1917///
1918/// # Usage with incant!
1919///
1920/// The generated variants work with `incant!` for dispatch:
1921///
1922/// ```rust,ignore
1923/// pub fn process_api(data: &[f32]) -> f32 {
1924///     incant!(process(data))
1925/// }
1926///
1927/// // Or with matching explicit tiers:
1928/// pub fn process_api(data: &[f32]) -> f32 {
1929///     incant!(process(data), [v1, v3, neon, scalar])
1930/// }
1931/// ```
1932#[proc_macro_attribute]
1933pub fn magetypes(attr: TokenStream, item: TokenStream) -> TokenStream {
1934    let input_fn = parse_macro_input!(item as LightFn);
1935
1936    // Parse optional tier list from attribute args: tier1, tier2(feature), ...
1937    let tier_names: Vec<String> = if attr.is_empty() {
1938        DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect()
1939    } else {
1940        match syn::parse::Parser::parse(parse_tier_names, attr) {
1941            Ok(names) => names,
1942            Err(e) => return e.to_compile_error().into(),
1943        }
1944    };
1945
1946    // default_optional: tiers with cfg_feature are optional by default
1947    let tiers = match resolve_tiers(
1948        &tier_names,
1949        input_fn.sig.ident.span(),
1950        true, // magetypes always uses default_optional for cfg_feature tiers
1951    ) {
1952        Ok(t) => t,
1953        Err(e) => return e.to_compile_error().into(),
1954    };
1955
1956    magetypes_impl(input_fn, &tiers)
1957}
1958
1959fn magetypes_impl(mut input_fn: LightFn, tiers: &[ResolvedTier]) -> TokenStream {
1960    // Strip user-provided #[arcane] / #[rite] to prevent double-wrapping
1961    // (magetypes auto-adds #[arcane] on non-scalar variants)
1962    input_fn
1963        .attrs
1964        .retain(|attr| !attr.path().is_ident("arcane") && !attr.path().is_ident("rite"));
1965
1966    let fn_name = &input_fn.sig.ident;
1967    let fn_attrs = &input_fn.attrs;
1968
1969    // Convert function to string for text substitution
1970    let fn_str = input_fn.to_token_stream().to_string();
1971
1972    let mut variants = Vec::new();
1973
1974    for tier in tiers {
1975        // Create suffixed function name
1976        let suffixed_name = format!("{}_{}", fn_name, tier.suffix);
1977
1978        // Do text substitution
1979        let mut variant_str = fn_str.clone();
1980
1981        // Replace function name
1982        variant_str = variant_str.replacen(&fn_name.to_string(), &suffixed_name, 1);
1983
1984        // Replace Token type with concrete token
1985        variant_str = variant_str.replace("Token", tier.token_path);
1986
1987        // Parse back to tokens
1988        let variant_tokens: proc_macro2::TokenStream = match variant_str.parse() {
1989            Ok(t) => t,
1990            Err(e) => {
1991                return syn::Error::new_spanned(
1992                    &input_fn,
1993                    format!(
1994                        "Failed to parse generated variant `{}`: {}",
1995                        suffixed_name, e
1996                    ),
1997                )
1998                .to_compile_error()
1999                .into();
2000            }
2001        };
2002
2003        // Add cfg guard: arch + optional feature gate
2004        let allow_attr = if tier.allow_unexpected_cfg {
2005            quote! { #[allow(unexpected_cfgs)] }
2006        } else {
2007            quote! {}
2008        };
2009        let cfg_guard = match (tier.target_arch, &tier.feature_gate) {
2010            (Some(arch), Some(feat)) => quote! {
2011                #[cfg(target_arch = #arch)]
2012                #allow_attr
2013                #[cfg(feature = #feat)]
2014            },
2015            (Some(arch), None) => quote! { #[cfg(target_arch = #arch)] },
2016            (None, Some(feat)) => quote! {
2017                #allow_attr
2018                #[cfg(feature = #feat)]
2019            },
2020            (None, None) => quote! {},
2021        };
2022
2023        variants.push(if tier.name != "scalar" {
2024            // Non-scalar variants get #[arcane] so target_feature is applied
2025            quote! {
2026                #cfg_guard
2027                #[archmage::arcane]
2028                #variant_tokens
2029            }
2030        } else {
2031            quote! {
2032                #cfg_guard
2033                #variant_tokens
2034            }
2035        });
2036    }
2037
2038    // Remove attributes from the list that should not be duplicated
2039    let filtered_attrs: Vec<_> = fn_attrs
2040        .iter()
2041        .filter(|a| !a.path().is_ident("magetypes"))
2042        .collect();
2043
2044    let output = quote! {
2045        #(#filtered_attrs)*
2046        #(#variants)*
2047    };
2048
2049    output.into()
2050}
2051
2052// =============================================================================
2053// incant! macro - dispatch to platform-specific variants
2054// =============================================================================
2055
2056// =============================================================================
2057// Tier descriptors for incant! and #[magetypes]
2058// =============================================================================
2059
2060/// Describes a dispatch tier for incant! and #[magetypes].
2061struct TierDescriptor {
2062    /// Tier name as written in user code (e.g., "v3", "neon")
2063    name: &'static str,
2064    /// Function suffix (e.g., "v3", "neon", "scalar")
2065    suffix: &'static str,
2066    /// Token type path (e.g., "archmage::X64V3Token")
2067    token_path: &'static str,
2068    /// IntoConcreteToken method name (e.g., "as_x64v3")
2069    as_method: &'static str,
2070    /// Target architecture for cfg guard (None = no guard)
2071    target_arch: Option<&'static str>,
2072    /// Cargo feature required for this tier's functions to exist.
2073    /// When a tier is resolved as "optional" (e.g., `v4?` in incant! or in default
2074    /// tier lists), dispatch is wrapped in `#[allow(unexpected_cfgs)] #[cfg(feature = "...")]`.
2075    /// This checks the CALLING crate's features — matching the cfg on the function definitions.
2076    cfg_feature: Option<&'static str>,
2077    /// Dispatch priority (higher = tried first within same arch)
2078    priority: u32,
2079}
2080
2081/// All known tiers in dispatch-priority order (highest first within arch).
2082const ALL_TIERS: &[TierDescriptor] = &[
2083    // x86: highest to lowest
2084    TierDescriptor {
2085        name: "v4x",
2086        suffix: "v4x",
2087        token_path: "archmage::X64V4xToken",
2088        as_method: "as_x64v4x",
2089        target_arch: Some("x86_64"),
2090        cfg_feature: Some("avx512"),
2091        priority: 50,
2092    },
2093    TierDescriptor {
2094        name: "v4",
2095        suffix: "v4",
2096        token_path: "archmage::X64V4Token",
2097        as_method: "as_x64v4",
2098        target_arch: Some("x86_64"),
2099        cfg_feature: Some("avx512"),
2100        priority: 40,
2101    },
2102    TierDescriptor {
2103        name: "v3_crypto",
2104        suffix: "v3_crypto",
2105        token_path: "archmage::X64V3CryptoToken",
2106        as_method: "as_x64v3_crypto",
2107        target_arch: Some("x86_64"),
2108
2109        cfg_feature: None,
2110        priority: 35,
2111    },
2112    TierDescriptor {
2113        name: "v3",
2114        suffix: "v3",
2115        token_path: "archmage::X64V3Token",
2116        as_method: "as_x64v3",
2117        target_arch: Some("x86_64"),
2118
2119        cfg_feature: None,
2120        priority: 30,
2121    },
2122    TierDescriptor {
2123        name: "x64_crypto",
2124        suffix: "x64_crypto",
2125        token_path: "archmage::X64CryptoToken",
2126        as_method: "as_x64_crypto",
2127        target_arch: Some("x86_64"),
2128
2129        cfg_feature: None,
2130        priority: 25,
2131    },
2132    TierDescriptor {
2133        name: "v2",
2134        suffix: "v2",
2135        token_path: "archmage::X64V2Token",
2136        as_method: "as_x64v2",
2137        target_arch: Some("x86_64"),
2138
2139        cfg_feature: None,
2140        priority: 20,
2141    },
2142    TierDescriptor {
2143        name: "v1",
2144        suffix: "v1",
2145        token_path: "archmage::X64V1Token",
2146        as_method: "as_x64v1",
2147        target_arch: Some("x86_64"),
2148
2149        cfg_feature: None,
2150        priority: 10,
2151    },
2152    // ARM: highest to lowest
2153    TierDescriptor {
2154        name: "arm_v3",
2155        suffix: "arm_v3",
2156        token_path: "archmage::Arm64V3Token",
2157        as_method: "as_arm_v3",
2158        target_arch: Some("aarch64"),
2159
2160        cfg_feature: None,
2161        priority: 50,
2162    },
2163    TierDescriptor {
2164        name: "arm_v2",
2165        suffix: "arm_v2",
2166        token_path: "archmage::Arm64V2Token",
2167        as_method: "as_arm_v2",
2168        target_arch: Some("aarch64"),
2169
2170        cfg_feature: None,
2171        priority: 40,
2172    },
2173    TierDescriptor {
2174        name: "neon_aes",
2175        suffix: "neon_aes",
2176        token_path: "archmage::NeonAesToken",
2177        as_method: "as_neon_aes",
2178        target_arch: Some("aarch64"),
2179
2180        cfg_feature: None,
2181        priority: 30,
2182    },
2183    TierDescriptor {
2184        name: "neon_sha3",
2185        suffix: "neon_sha3",
2186        token_path: "archmage::NeonSha3Token",
2187        as_method: "as_neon_sha3",
2188        target_arch: Some("aarch64"),
2189
2190        cfg_feature: None,
2191        priority: 30,
2192    },
2193    TierDescriptor {
2194        name: "neon_crc",
2195        suffix: "neon_crc",
2196        token_path: "archmage::NeonCrcToken",
2197        as_method: "as_neon_crc",
2198        target_arch: Some("aarch64"),
2199
2200        cfg_feature: None,
2201        priority: 30,
2202    },
2203    TierDescriptor {
2204        name: "neon",
2205        suffix: "neon",
2206        token_path: "archmage::NeonToken",
2207        as_method: "as_neon",
2208        target_arch: Some("aarch64"),
2209
2210        cfg_feature: None,
2211        priority: 20,
2212    },
2213    // WASM
2214    TierDescriptor {
2215        name: "wasm128_relaxed",
2216        suffix: "wasm128_relaxed",
2217        token_path: "archmage::Wasm128RelaxedToken",
2218        as_method: "as_wasm128_relaxed",
2219        target_arch: Some("wasm32"),
2220
2221        cfg_feature: None,
2222        priority: 21,
2223    },
2224    TierDescriptor {
2225        name: "wasm128",
2226        suffix: "wasm128",
2227        token_path: "archmage::Wasm128Token",
2228        as_method: "as_wasm128",
2229        target_arch: Some("wasm32"),
2230
2231        cfg_feature: None,
2232        priority: 20,
2233    },
2234    // Scalar (always last)
2235    TierDescriptor {
2236        name: "scalar",
2237        suffix: "scalar",
2238        token_path: "archmage::ScalarToken",
2239        as_method: "as_scalar",
2240        target_arch: None,
2241
2242        cfg_feature: None,
2243        priority: 0,
2244    },
2245];
2246
2247/// Default tiers for all dispatch macros. Always includes v4 in the list —
2248/// `resolve_tiers` with `skip_avx512=true` filters it out when the feature is off.
2249const DEFAULT_TIER_NAMES: &[&str] = &["v4", "v3", "neon", "wasm128", "scalar"];
2250
2251/// Whether `incant!` requires `scalar` in explicit tier lists.
2252/// Currently false for backwards compatibility. Flip to true in v1.0.
2253const REQUIRE_EXPLICIT_SCALAR: bool = false;
2254
2255/// Parse a comma-separated list of tier names, each optionally followed by
2256/// `(feature)` for cfg-gating: `v4(avx512), v3, neon(simd), scalar`.
2257fn parse_tier_names(input: ParseStream) -> syn::Result<Vec<String>> {
2258    let mut names = Vec::new();
2259    while !input.is_empty() {
2260        let ident: Ident = input.parse()?;
2261        let name = if input.peek(syn::token::Paren) {
2262            let paren_content;
2263            syn::parenthesized!(paren_content in input);
2264            let feat: Ident = paren_content.parse()?;
2265            format!("{}({})", ident, feat)
2266        } else {
2267            ident.to_string()
2268        };
2269        names.push(name);
2270        if input.peek(Token![,]) {
2271            let _: Token![,] = input.parse()?;
2272        }
2273    }
2274    Ok(names)
2275}
2276
2277/// Look up a tier by name, returning an error on unknown names.
2278fn find_tier(name: &str) -> Option<&'static TierDescriptor> {
2279    ALL_TIERS.iter().find(|t| t.name == name)
2280}
2281
2282/// A resolved tier with its optional flag.
2283#[derive(Clone)]
2284struct ResolvedTier {
2285    tier: &'static TierDescriptor,
2286    /// When Some, dispatch/generation is wrapped in `#[cfg(feature = "...")]`
2287    /// so it's eliminated when the calling crate doesn't define the feature.
2288    /// Set explicitly via `v4(avx512)` syntax or implicitly from `cfg_feature`
2289    /// on the TierDescriptor when using default tier lists.
2290    feature_gate: Option<String>,
2291    /// When true, `#[allow(unexpected_cfgs)]` is added before the `#[cfg]`.
2292    /// True for implicit gates (from defaults), false for explicit `tier(feat)`.
2293    allow_unexpected_cfg: bool,
2294}
2295
2296impl core::ops::Deref for ResolvedTier {
2297    type Target = TierDescriptor;
2298    fn deref(&self) -> &TierDescriptor {
2299        self.tier
2300    }
2301}
2302
2303/// Resolve tier names to descriptors, sorted by dispatch priority (highest first).
2304/// Always appends "scalar" if not already present.
2305///
2306/// Tier names can include a feature gate: `v4(avx512)` wraps dispatch/generation
2307/// in `#[allow(unexpected_cfgs)] #[cfg(feature = "avx512")]`. Any feature name
2308/// works: `neon(simd)`, `wasm128(wasm)`, etc. Without parentheses, the tier is
2309/// unconditional.
2310///
2311/// When `default_feature_gates` is true, tiers with `cfg_feature` in their
2312/// descriptor automatically get that as their feature gate, even without explicit
2313/// `(feature)` syntax. Used for default tier lists — v4/v4x auto-get `(avx512)`.
2314fn resolve_tiers(
2315    tier_names: &[String],
2316    error_span: proc_macro2::Span,
2317    default_feature_gates: bool,
2318) -> syn::Result<Vec<ResolvedTier>> {
2319    let mut tiers = Vec::new();
2320    for raw_name in tier_names {
2321        // Parse "tier(feature)" or plain "tier"
2322        let (name, explicit_gate) = if let Some(paren_pos) = raw_name.find('(') {
2323            let tier_name = &raw_name[..paren_pos];
2324            let feat = raw_name[paren_pos + 1..].trim_end_matches(')');
2325            (tier_name, Some(feat.to_string()))
2326        } else {
2327            (raw_name.as_str(), None)
2328        };
2329        match find_tier(name) {
2330            Some(tier) => {
2331                let is_explicit = explicit_gate.is_some();
2332                let feature_gate = explicit_gate.or_else(|| {
2333                    if default_feature_gates {
2334                        tier.cfg_feature.map(String::from)
2335                    } else {
2336                        None
2337                    }
2338                });
2339                tiers.push(ResolvedTier {
2340                    tier,
2341                    allow_unexpected_cfg: feature_gate.is_some() && !is_explicit,
2342                    feature_gate,
2343                });
2344            }
2345            None => {
2346                let known: Vec<&str> = ALL_TIERS.iter().map(|t| t.name).collect();
2347                return Err(syn::Error::new(
2348                    error_span,
2349                    format!("unknown tier `{}`. Known tiers: {}", name, known.join(", ")),
2350                ));
2351            }
2352        }
2353    }
2354
2355    // Always include scalar fallback
2356    if !tiers.iter().any(|rt| rt.tier.name == "scalar") {
2357        tiers.push(ResolvedTier {
2358            tier: find_tier("scalar").unwrap(),
2359            feature_gate: None,
2360            allow_unexpected_cfg: false,
2361        });
2362    }
2363
2364    // Sort by priority (highest first) for correct dispatch order
2365    tiers.sort_by(|a, b| b.tier.priority.cmp(&a.tier.priority));
2366
2367    Ok(tiers)
2368}
2369
2370// =============================================================================
2371// incant! macro - dispatch to platform-specific variants
2372// =============================================================================
2373
2374/// Input for the incant! macro
2375struct IncantInput {
2376    /// Function path to call (e.g. `func` or `module::func`)
2377    func_path: syn::Path,
2378    /// Arguments to pass
2379    args: Vec<syn::Expr>,
2380    /// Optional token variable for passthrough mode
2381    with_token: Option<syn::Expr>,
2382    /// Optional explicit tier list (None = default tiers)
2383    tiers: Option<(Vec<String>, proc_macro2::Span)>,
2384}
2385
2386/// Create a suffixed version of a function path.
2387/// e.g. `module::func` + `"v3"` → `module::func_v3`
2388fn suffix_path(path: &syn::Path, suffix: &str) -> syn::Path {
2389    let mut suffixed = path.clone();
2390    if let Some(last) = suffixed.segments.last_mut() {
2391        last.ident = format_ident!("{}_{}", last.ident, suffix);
2392    }
2393    suffixed
2394}
2395
2396impl Parse for IncantInput {
2397    fn parse(input: ParseStream) -> syn::Result<Self> {
2398        // Parse: function_path(arg1, arg2, ...) [with token_expr] [, [tier1, tier2, ...]]
2399        let func_path: syn::Path = input.parse()?;
2400
2401        // Parse parenthesized arguments
2402        let content;
2403        syn::parenthesized!(content in input);
2404        let args = content
2405            .parse_terminated(syn::Expr::parse, Token![,])?
2406            .into_iter()
2407            .collect();
2408
2409        // Check for optional "with token"
2410        let with_token = if input.peek(Ident) {
2411            let kw: Ident = input.parse()?;
2412            if kw != "with" {
2413                return Err(syn::Error::new_spanned(kw, "expected `with` keyword"));
2414            }
2415            Some(input.parse()?)
2416        } else {
2417            None
2418        };
2419
2420        // Check for optional tier list: , [tier1, tier2(feature), ...]
2421        // tier(feature) wraps dispatch in #[cfg(feature = "feature")].
2422        // Example: [v4(avx512), v3, neon(simd), scalar]
2423        let tiers = if input.peek(Token![,]) {
2424            let _: Token![,] = input.parse()?;
2425            let bracket_content;
2426            let bracket = syn::bracketed!(bracket_content in input);
2427            let mut tier_names = Vec::new();
2428            while !bracket_content.is_empty() {
2429                let ident: Ident = bracket_content.parse()?;
2430                let name = if bracket_content.peek(syn::token::Paren) {
2431                    // Parse tier(feature) — feature gate syntax
2432                    let paren_content;
2433                    syn::parenthesized!(paren_content in bracket_content);
2434                    let feat: Ident = paren_content.parse()?;
2435                    format!("{}({})", ident, feat)
2436                } else {
2437                    ident.to_string()
2438                };
2439                tier_names.push(name);
2440                if bracket_content.peek(Token![,]) {
2441                    let _: Token![,] = bracket_content.parse()?;
2442                }
2443            }
2444            Some((tier_names, bracket.span.join()))
2445        } else {
2446            None
2447        };
2448
2449        Ok(IncantInput {
2450            func_path,
2451            args,
2452            with_token,
2453            tiers,
2454        })
2455    }
2456}
2457
2458/// Dispatch to platform-specific SIMD variants.
2459///
2460/// # Entry Point Mode (no token yet)
2461///
2462/// Summons tokens and dispatches to the best available variant:
2463///
2464/// ```rust,ignore
2465/// pub fn public_api(data: &[f32]) -> f32 {
2466///     incant!(dot(data))
2467/// }
2468/// ```
2469///
2470/// Expands to runtime feature detection + dispatch to `dot_v3`, `dot_v4`,
2471/// `dot_neon`, `dot_wasm128`, or `dot_scalar`.
2472///
2473/// # Explicit Tiers
2474///
2475/// Specify which tiers to dispatch to:
2476///
2477/// ```rust,ignore
2478/// // Only dispatch to v1, v3, neon, and scalar
2479/// pub fn api(data: &[f32]) -> f32 {
2480///     incant!(process(data), [v1, v3, neon, scalar])
2481/// }
2482/// ```
2483///
2484/// Always include `scalar` in explicit tier lists — `incant!` always
2485/// emits a `fn_scalar()` call as the final fallback, and listing it
2486/// documents this dependency. Currently auto-appended if omitted;
2487/// will become a compile error in v1.0. Unknown tier names cause a
2488/// compile error. Tiers are automatically sorted into correct
2489/// dispatch order (highest priority first).
2490///
2491/// Known tiers: `v1`, `v2`, `v3`, `v4`, `v4x`, `neon`, `neon_aes`,
2492/// `neon_sha3`, `neon_crc`, `wasm128`, `wasm128_relaxed`, `scalar`.
2493///
2494/// # Passthrough Mode (already have token)
2495///
2496/// Uses compile-time dispatch via `IntoConcreteToken`:
2497///
2498/// ```rust,ignore
2499/// #[arcane]
2500/// fn outer(token: X64V3Token, data: &[f32]) -> f32 {
2501///     incant!(inner(data) with token)
2502/// }
2503/// ```
2504///
2505/// Also supports explicit tiers:
2506///
2507/// ```rust,ignore
2508/// fn inner<T: IntoConcreteToken>(token: T, data: &[f32]) -> f32 {
2509///     incant!(process(data) with token, [v3, neon, scalar])
2510/// }
2511/// ```
2512///
2513/// The compiler monomorphizes the dispatch, eliminating non-matching branches.
2514///
2515/// # Variant Naming
2516///
2517/// Functions must have suffixed variants matching the selected tiers:
2518/// - `_v1` for `X64V1Token`
2519/// - `_v2` for `X64V2Token`
2520/// - `_v3` for `X64V3Token`
2521/// - `_v4` for `X64V4Token` (requires `avx512` feature)
2522/// - `_v4x` for `X64V4xToken` (requires `avx512` feature)
2523/// - `_neon` for `NeonToken`
2524/// - `_neon_aes` for `NeonAesToken`
2525/// - `_neon_sha3` for `NeonSha3Token`
2526/// - `_neon_crc` for `NeonCrcToken`
2527/// - `_wasm128` for `Wasm128Token`
2528/// - `_scalar` for `ScalarToken`
2529#[proc_macro]
2530pub fn incant(input: TokenStream) -> TokenStream {
2531    let input = parse_macro_input!(input as IncantInput);
2532    incant_impl(input)
2533}
2534
2535/// Legacy alias for [`incant!`].
2536#[proc_macro]
2537pub fn simd_route(input: TokenStream) -> TokenStream {
2538    let input = parse_macro_input!(input as IncantInput);
2539    incant_impl(input)
2540}
2541
2542/// Descriptive alias for [`incant!`].
2543///
2544/// Dispatches to architecture-specific function variants at runtime.
2545/// Looks for suffixed functions (`_v3`, `_v4`, `_neon`, `_wasm128`, `_scalar`)
2546/// and calls the best one the CPU supports.
2547///
2548/// See [`incant!`] for full documentation and examples.
2549#[proc_macro]
2550pub fn dispatch_variant(input: TokenStream) -> TokenStream {
2551    let input = parse_macro_input!(input as IncantInput);
2552    incant_impl(input)
2553}
2554
2555fn incant_impl(input: IncantInput) -> TokenStream {
2556    let func_path = &input.func_path;
2557    let args = &input.args;
2558
2559    // Resolve tiers
2560    let tier_names: Vec<String> = match &input.tiers {
2561        Some((names, _)) => names.clone(),
2562        None => DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect(),
2563    };
2564    let last_segment_span = func_path
2565        .segments
2566        .last()
2567        .map(|s| s.ident.span())
2568        .unwrap_or_else(proc_macro2::Span::call_site);
2569    let error_span = input
2570        .tiers
2571        .as_ref()
2572        .map(|(_, span)| *span)
2573        .unwrap_or(last_segment_span);
2574
2575    // When the user specifies explicit tiers, require `scalar` in the list.
2576    // This forces acknowledgment that a scalar fallback path exists and must
2577    // be implemented. Default tiers (no bracket list) always include scalar.
2578    // TODO(v1.0): flip REQUIRE_EXPLICIT_SCALAR to true
2579    if REQUIRE_EXPLICIT_SCALAR
2580        && let Some((names, span)) = &input.tiers
2581        && !names.iter().any(|n| n == "scalar")
2582    {
2583        return syn::Error::new(
2584            *span,
2585            "explicit tier list must include `scalar`. \
2586             incant! always dispatches to fn_scalar() as the final fallback, \
2587             so `scalar` must appear in the tier list to acknowledge this. \
2588             Example: [v3, neon, scalar]",
2589        )
2590        .to_compile_error()
2591        .into();
2592    }
2593
2594    // Apply default feature gates: tiers with cfg_feature (v4→avx512) auto-get
2595    // the gate unless the user explicitly wrote tier(feature). This is true for
2596    // BOTH default and explicit tier lists — backwards compatible with published
2597    // crates using [v4, v3, neon] where _v4 is behind #[cfg(feature = "avx512")].
2598    // Users with unconditional _v4 functions use v4(!) or just don't cfg-gate them.
2599    let tiers = match resolve_tiers(&tier_names, error_span, true) {
2600        Ok(t) => t,
2601        Err(e) => return e.to_compile_error().into(),
2602    };
2603
2604    // Group tiers by architecture for cfg-guarded blocks
2605    // Within each arch, tiers are already sorted by priority (highest first)
2606    if let Some(token_expr) = &input.with_token {
2607        gen_incant_passthrough(func_path, args, token_expr, &tiers)
2608    } else {
2609        gen_incant_entry(func_path, args, &tiers)
2610    }
2611}
2612
2613/// Generate incant! passthrough mode (already have a token).
2614fn gen_incant_passthrough(
2615    func_path: &syn::Path,
2616    args: &[syn::Expr],
2617    token_expr: &syn::Expr,
2618    tiers: &[ResolvedTier],
2619) -> TokenStream {
2620    let mut dispatch_arms = Vec::new();
2621
2622    // Group non-scalar tiers by target_arch for cfg blocks
2623    let mut arch_groups: Vec<(Option<&str>, Vec<&ResolvedTier>)> = Vec::new();
2624    for rt in tiers {
2625        if rt.name == "scalar" {
2626            continue; // Handle scalar separately at the end
2627        }
2628        if let Some(group) = arch_groups.iter_mut().find(|(a, _)| *a == rt.target_arch) {
2629            group.1.push(rt);
2630        } else {
2631            arch_groups.push((rt.target_arch, vec![rt]));
2632        }
2633    }
2634
2635    for (target_arch, group_tiers) in &arch_groups {
2636        let mut tier_checks = Vec::new();
2637        for rt in group_tiers {
2638            let fn_suffixed = suffix_path(func_path, rt.suffix);
2639            let as_method = format_ident!("{}", rt.as_method);
2640
2641            let check = quote! {
2642                if let Some(__t) = __incant_token.#as_method() {
2643                    break '__incant #fn_suffixed(__t, #(#args),*);
2644                }
2645            };
2646
2647            if let Some(feat) = &rt.feature_gate {
2648                let allow_attr = if rt.allow_unexpected_cfg {
2649                    quote! { #[allow(unexpected_cfgs)] }
2650                } else {
2651                    quote! {}
2652                };
2653                tier_checks.push(quote! {
2654                    #allow_attr
2655                    #[cfg(feature = #feat)]
2656                    { #check }
2657                });
2658            } else {
2659                tier_checks.push(check);
2660            }
2661        }
2662
2663        let inner = quote! { #(#tier_checks)* };
2664
2665        if let Some(arch) = target_arch {
2666            dispatch_arms.push(quote! {
2667                #[cfg(target_arch = #arch)]
2668                { #inner }
2669            });
2670        } else {
2671            dispatch_arms.push(inner);
2672        }
2673    }
2674
2675    // Scalar fallback (always last)
2676    let fn_scalar = suffix_path(func_path, "scalar");
2677    let scalar_arm = if tiers.iter().any(|t| t.name == "scalar") {
2678        quote! {
2679            if let Some(__t) = __incant_token.as_scalar() {
2680                break '__incant #fn_scalar(__t, #(#args),*);
2681            }
2682            unreachable!("Token did not match any known variant")
2683        }
2684    } else {
2685        quote! { unreachable!("Token did not match any known variant") }
2686    };
2687
2688    let expanded = quote! {
2689        '__incant: {
2690            use archmage::IntoConcreteToken;
2691            let __incant_token = #token_expr;
2692            #(#dispatch_arms)*
2693            #scalar_arm
2694        }
2695    };
2696    expanded.into()
2697}
2698
2699/// Generate incant! entry point mode (summon tokens).
2700fn gen_incant_entry(
2701    func_path: &syn::Path,
2702    args: &[syn::Expr],
2703    tiers: &[ResolvedTier],
2704) -> TokenStream {
2705    let mut dispatch_arms = Vec::new();
2706
2707    // Group non-scalar tiers by target_arch for cfg blocks.
2708    let mut arch_groups: Vec<(Option<&str>, Vec<&ResolvedTier>)> = Vec::new();
2709    for rt in tiers {
2710        if rt.name == "scalar" {
2711            continue;
2712        }
2713        if let Some(group) = arch_groups.iter_mut().find(|(a, _)| *a == rt.target_arch) {
2714            group.1.push(rt);
2715        } else {
2716            arch_groups.push((rt.target_arch, vec![rt]));
2717        }
2718    }
2719
2720    for (target_arch, group_tiers) in &arch_groups {
2721        let mut tier_checks = Vec::new();
2722        for rt in group_tiers {
2723            let fn_suffixed = suffix_path(func_path, rt.suffix);
2724            let token_path: syn::Path = syn::parse_str(rt.token_path).unwrap();
2725
2726            let check = quote! {
2727                if let Some(__t) = #token_path::summon() {
2728                    break '__incant #fn_suffixed(__t, #(#args),*);
2729                }
2730            };
2731
2732            if let Some(feat) = &rt.feature_gate {
2733                let allow_attr = if rt.allow_unexpected_cfg {
2734                    quote! { #[allow(unexpected_cfgs)] }
2735                } else {
2736                    quote! {}
2737                };
2738                tier_checks.push(quote! {
2739                    #allow_attr
2740                    #[cfg(feature = #feat)]
2741                    { #check }
2742                });
2743            } else {
2744                tier_checks.push(check);
2745            }
2746        }
2747
2748        let inner = quote! { #(#tier_checks)* };
2749
2750        if let Some(arch) = target_arch {
2751            dispatch_arms.push(quote! {
2752                #[cfg(target_arch = #arch)]
2753                { #inner }
2754            });
2755        } else {
2756            dispatch_arms.push(inner);
2757        }
2758    }
2759
2760    // Scalar fallback
2761    let fn_scalar = suffix_path(func_path, "scalar");
2762
2763    let expanded = quote! {
2764        '__incant: {
2765            use archmage::SimdToken;
2766            #(#dispatch_arms)*
2767            #fn_scalar(archmage::ScalarToken, #(#args),*)
2768        }
2769    };
2770    expanded.into()
2771}
2772
2773// =============================================================================
2774// autoversion - combined variant generation + dispatch
2775// =============================================================================
2776
2777/// Arguments to the `#[autoversion]` macro.
2778struct AutoversionArgs {
2779    /// The concrete type to use for `self` receiver (inherent methods only).
2780    self_type: Option<Type>,
2781    /// Explicit tier names (None = default tiers).
2782    tiers: Option<Vec<String>>,
2783    /// When set, emit full autoversion under `#[cfg(feature = "...")]` and a
2784    /// plain scalar fallback under `#[cfg(not(feature = "..."))]`. Solves the
2785    /// hygiene issue with `macro_rules!` wrappers.
2786    cfg_feature: Option<String>,
2787}
2788
2789impl Parse for AutoversionArgs {
2790    fn parse(input: ParseStream) -> syn::Result<Self> {
2791        let mut self_type = None;
2792        let mut tier_names = Vec::new();
2793        let mut cfg_feature = None;
2794
2795        while !input.is_empty() {
2796            let ident: Ident = input.parse()?;
2797            if ident == "_self" {
2798                let _: Token![=] = input.parse()?;
2799                self_type = Some(input.parse()?);
2800            } else if ident == "cfg" {
2801                let content;
2802                syn::parenthesized!(content in input);
2803                let feat: Ident = content.parse()?;
2804                cfg_feature = Some(feat.to_string());
2805            } else {
2806                // Treat as tier name, optionally with (feature) gate
2807                let name = if input.peek(syn::token::Paren) {
2808                    let paren_content;
2809                    syn::parenthesized!(paren_content in input);
2810                    let feat: Ident = paren_content.parse()?;
2811                    format!("{}({})", ident, feat)
2812                } else {
2813                    ident.to_string()
2814                };
2815                tier_names.push(name);
2816            }
2817            if input.peek(Token![,]) {
2818                let _: Token![,] = input.parse()?;
2819            }
2820        }
2821
2822        Ok(AutoversionArgs {
2823            self_type,
2824            tiers: if tier_names.is_empty() {
2825                None
2826            } else {
2827                Some(tier_names)
2828            },
2829            cfg_feature,
2830        })
2831    }
2832}
2833
2834/// Information about the `SimdToken` parameter found in a function signature.
2835struct SimdTokenParamInfo {
2836    /// Index of the parameter in `sig.inputs`
2837    index: usize,
2838    /// The parameter identifier
2839    #[allow(dead_code)]
2840    ident: Ident,
2841}
2842
2843/// Find the `SimdToken` parameter in a function signature.
2844///
2845/// Searches all typed parameters for one whose type path ends in `SimdToken`.
2846/// Returns the parameter index and identifier, or `None` if not found.
2847fn find_simd_token_param(sig: &Signature) -> Option<SimdTokenParamInfo> {
2848    for (i, arg) in sig.inputs.iter().enumerate() {
2849        if let FnArg::Typed(PatType { pat, ty, .. }) = arg
2850            && let Type::Path(type_path) = ty.as_ref()
2851            && let Some(seg) = type_path.path.segments.last()
2852            && seg.ident == "SimdToken"
2853        {
2854            let ident = match pat.as_ref() {
2855                syn::Pat::Ident(pi) => pi.ident.clone(),
2856                syn::Pat::Wild(w) => Ident::new("__autoversion_token", w.underscore_token.span),
2857                _ => continue,
2858            };
2859            return Some(SimdTokenParamInfo { index: i, ident });
2860        }
2861    }
2862    None
2863}
2864
2865/// Core implementation for `#[autoversion]`.
2866///
2867/// Generates suffixed SIMD variants (like `#[magetypes]`) and a runtime
2868/// dispatcher function (like `incant!`) from a single annotated function.
2869fn autoversion_impl(mut input_fn: LightFn, args: AutoversionArgs) -> TokenStream {
2870    // Check for self receiver
2871    let has_self = input_fn
2872        .sig
2873        .inputs
2874        .first()
2875        .is_some_and(|arg| matches!(arg, FnArg::Receiver(_)));
2876
2877    // _self = Type is only needed for trait impls (nested mode in #[arcane]).
2878    // For inherent methods, self/Self work naturally in sibling mode.
2879
2880    // Find SimdToken parameter
2881    let token_param = match find_simd_token_param(&input_fn.sig) {
2882        Some(p) => p,
2883        None => {
2884            return syn::Error::new_spanned(
2885                &input_fn.sig,
2886                "autoversion requires a `SimdToken` parameter.\n\
2887                 Example: fn process(token: SimdToken, data: &[f32]) -> f32 { ... }\n\n\
2888                 SimdToken is the dispatch placeholder — autoversion replaces it \
2889                 with concrete token types and generates a runtime dispatcher.",
2890            )
2891            .to_compile_error()
2892            .into();
2893        }
2894    };
2895
2896    // Resolve tiers — autoversion always includes v4 in its defaults because it
2897    // generates scalar code compiled with #[target_feature], not import_intrinsics.
2898    let tier_names: Vec<String> = match &args.tiers {
2899        Some(names) => names.clone(),
2900        None => DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect(),
2901    };
2902    // autoversion never skips avx512 — it generates scalar code with #[target_feature]
2903    let tiers = match resolve_tiers(&tier_names, input_fn.sig.ident.span(), false) {
2904        Ok(t) => t,
2905        Err(e) => return e.to_compile_error().into(),
2906    };
2907
2908    // Strip #[arcane] / #[rite] to prevent double-wrapping
2909    input_fn
2910        .attrs
2911        .retain(|attr| !attr.path().is_ident("arcane") && !attr.path().is_ident("rite"));
2912
2913    let fn_name = &input_fn.sig.ident;
2914    let vis = input_fn.vis.clone();
2915
2916    // Move attrs to dispatcher only; variants get no user attrs
2917    let fn_attrs: Vec<Attribute> = input_fn.attrs.drain(..).collect();
2918
2919    // =========================================================================
2920    // Generate suffixed variants
2921    // =========================================================================
2922    //
2923    // AST manipulation only — we clone the parsed LightFn and swap the token
2924    // param's type annotation. No serialize/reparse round-trip. The body is
2925    // never touched unless _self = Type requires a `let _self = self;`
2926    // preamble on the scalar variant.
2927
2928    let mut variants = Vec::new();
2929
2930    for tier in &tiers {
2931        let mut variant_fn = input_fn.clone();
2932
2933        // Variants are always private — only the dispatcher is public.
2934        variant_fn.vis = syn::Visibility::Inherited;
2935
2936        // Rename: process → process_v3
2937        variant_fn.sig.ident = format_ident!("{}_{}", fn_name, tier.suffix);
2938
2939        // Replace SimdToken param type with concrete token type
2940        let concrete_type: Type = syn::parse_str(tier.token_path).unwrap();
2941        if let FnArg::Typed(pt) = &mut variant_fn.sig.inputs[token_param.index] {
2942            *pt.ty = concrete_type;
2943        }
2944
2945        // Scalar with _self = Type: inject `let _self = self;` preamble so body's
2946        // _self references resolve (non-scalar variants get this from #[arcane(_self = Type)])
2947        if tier.name == "scalar" && has_self && args.self_type.is_some() {
2948            let original_body = variant_fn.body.clone();
2949            variant_fn.body = quote!(let _self = self; #original_body);
2950        }
2951
2952        // cfg guard: arch + optional feature gate from tier(feature) syntax
2953        let allow_attr = if tier.allow_unexpected_cfg {
2954            quote! { #[allow(unexpected_cfgs)] }
2955        } else {
2956            quote! {}
2957        };
2958        let cfg_guard = match (tier.target_arch, &tier.feature_gate) {
2959            (Some(arch), Some(feat)) => quote! {
2960                #[cfg(target_arch = #arch)]
2961                #allow_attr
2962                #[cfg(feature = #feat)]
2963            },
2964            (Some(arch), None) => quote! { #[cfg(target_arch = #arch)] },
2965            (None, Some(feat)) => quote! {
2966                #allow_attr
2967                #[cfg(feature = #feat)]
2968            },
2969            (None, None) => quote! {},
2970        };
2971
2972        // All variants are private implementation details of the dispatcher.
2973        // Suppress dead_code: if the dispatcher is unused, rustc warns on IT
2974        // (via quote_spanned! with the user's span). Warning on individual
2975        // variants would be confusing — the user didn't write _scalar or _v3.
2976        if tier.name != "scalar" {
2977            let arcane_attr = if let Some(ref self_type) = args.self_type {
2978                quote! { #[archmage::arcane(_self = #self_type)] }
2979            } else {
2980                quote! { #[archmage::arcane] }
2981            };
2982            variants.push(quote! {
2983                #cfg_guard
2984                #[allow(dead_code)]
2985                #arcane_attr
2986                #variant_fn
2987            });
2988        } else {
2989            variants.push(quote! {
2990                #cfg_guard
2991                #[allow(dead_code)]
2992                #variant_fn
2993            });
2994        }
2995    }
2996
2997    // =========================================================================
2998    // Generate dispatcher (adapted from gen_incant_entry)
2999    // =========================================================================
3000
3001    // Build dispatcher inputs: original params minus SimdToken
3002    let mut dispatcher_inputs: Vec<FnArg> = input_fn.sig.inputs.iter().cloned().collect();
3003    dispatcher_inputs.remove(token_param.index);
3004
3005    // Rename wildcard params so we can pass them as arguments
3006    let mut wild_counter = 0u32;
3007    for arg in &mut dispatcher_inputs {
3008        if let FnArg::Typed(pat_type) = arg
3009            && matches!(pat_type.pat.as_ref(), syn::Pat::Wild(_))
3010        {
3011            let ident = format_ident!("__autoversion_wild_{}", wild_counter);
3012            wild_counter += 1;
3013            *pat_type.pat = syn::Pat::Ident(syn::PatIdent {
3014                attrs: vec![],
3015                by_ref: None,
3016                mutability: None,
3017                ident,
3018                subpat: None,
3019            });
3020        }
3021    }
3022
3023    // Collect argument idents for dispatch calls (exclude self receiver)
3024    let dispatch_args: Vec<Ident> = dispatcher_inputs
3025        .iter()
3026        .filter_map(|arg| {
3027            if let FnArg::Typed(PatType { pat, .. }) = arg
3028                && let syn::Pat::Ident(pi) = pat.as_ref()
3029            {
3030                return Some(pi.ident.clone());
3031            }
3032            None
3033        })
3034        .collect();
3035
3036    // Build turbofish for forwarding type/const generics to variant calls
3037    let turbofish = build_turbofish(&input_fn.sig.generics);
3038
3039    // Group non-scalar tiers by target_arch for cfg blocks
3040    let mut arch_groups: Vec<(Option<&str>, Vec<&ResolvedTier>)> = Vec::new();
3041    for tier in &tiers {
3042        if tier.name == "scalar" {
3043            continue;
3044        }
3045        if let Some(group) = arch_groups.iter_mut().find(|(a, _)| *a == tier.target_arch) {
3046            group.1.push(tier);
3047        } else {
3048            arch_groups.push((tier.target_arch, vec![tier]));
3049        }
3050    }
3051
3052    let mut dispatch_arms = Vec::new();
3053    for (target_arch, group_tiers) in &arch_groups {
3054        let mut tier_checks = Vec::new();
3055        for rt in group_tiers {
3056            let suffixed = format_ident!("{}_{}", fn_name, rt.suffix);
3057            let token_path: syn::Path = syn::parse_str(rt.token_path).unwrap();
3058
3059            let call = if has_self {
3060                quote! { self.#suffixed #turbofish(__t, #(#dispatch_args),*) }
3061            } else {
3062                quote! { #suffixed #turbofish(__t, #(#dispatch_args),*) }
3063            };
3064
3065            let check = quote! {
3066                if let Some(__t) = #token_path::summon() {
3067                    return #call;
3068                }
3069            };
3070
3071            if let Some(feat) = &rt.feature_gate {
3072                let allow_attr = if rt.allow_unexpected_cfg {
3073                    quote! { #[allow(unexpected_cfgs)] }
3074                } else {
3075                    quote! {}
3076                };
3077                tier_checks.push(quote! {
3078                    #allow_attr
3079                    #[cfg(feature = #feat)]
3080                    { #check }
3081                });
3082            } else {
3083                tier_checks.push(check);
3084            }
3085        }
3086
3087        let inner = quote! { #(#tier_checks)* };
3088
3089        if let Some(arch) = target_arch {
3090            dispatch_arms.push(quote! {
3091                #[cfg(target_arch = #arch)]
3092                { #inner }
3093            });
3094        } else {
3095            dispatch_arms.push(inner);
3096        }
3097    }
3098
3099    // Scalar fallback (always available, no summon needed)
3100    let scalar_name = format_ident!("{}_scalar", fn_name);
3101    let scalar_call = if has_self {
3102        quote! { self.#scalar_name #turbofish(archmage::ScalarToken, #(#dispatch_args),*) }
3103    } else {
3104        quote! { #scalar_name #turbofish(archmage::ScalarToken, #(#dispatch_args),*) }
3105    };
3106
3107    // Build dispatcher function
3108    let dispatcher_inputs_punct: syn::punctuated::Punctuated<FnArg, Token![,]> =
3109        dispatcher_inputs.into_iter().collect();
3110    let output = &input_fn.sig.output;
3111    let generics = &input_fn.sig.generics;
3112    let where_clause = &generics.where_clause;
3113
3114    // Use the user's span for the dispatcher so dead_code lint fires on the
3115    // function the user actually wrote, not on invisible generated variants.
3116    let user_span = fn_name.span();
3117
3118    // autoversion uses `return` instead of `break '__dispatch` — no labeled block
3119    // needed. This avoids label hygiene issues when #[autoversion] is applied inside
3120    // macro_rules! (labels from proc macros can't be seen from macro_rules! contexts).
3121    let dispatcher = if let Some(ref feat) = args.cfg_feature {
3122        // cfg(feature): full dispatch when on, scalar-only when off
3123        quote_spanned! { user_span =>
3124            #[cfg(feature = #feat)]
3125            #(#fn_attrs)*
3126            #vis fn #fn_name #generics (#dispatcher_inputs_punct) #output #where_clause {
3127                use archmage::SimdToken;
3128                #(#dispatch_arms)*
3129                #scalar_call
3130            }
3131
3132            #[cfg(not(feature = #feat))]
3133            #(#fn_attrs)*
3134            #vis fn #fn_name #generics (#dispatcher_inputs_punct) #output #where_clause {
3135                #scalar_call
3136            }
3137        }
3138    } else {
3139        quote_spanned! { user_span =>
3140            #(#fn_attrs)*
3141            #vis fn #fn_name #generics (#dispatcher_inputs_punct) #output #where_clause {
3142                use archmage::SimdToken;
3143                #(#dispatch_arms)*
3144                #scalar_call
3145            }
3146        }
3147    };
3148
3149    let expanded = quote! {
3150        #dispatcher
3151        #(#variants)*
3152    };
3153
3154    expanded.into()
3155}
3156
3157/// Let the compiler auto-vectorize scalar code for each architecture.
3158///
3159/// Write a plain scalar function with a `SimdToken` placeholder parameter.
3160/// `#[autoversion]` generates architecture-specific copies — each compiled
3161/// with different `#[target_feature]` flags via `#[arcane]` — plus a runtime
3162/// dispatcher that calls the best one the CPU supports.
3163///
3164/// You don't touch intrinsics, don't import SIMD types, don't think about
3165/// lane widths. The compiler's auto-vectorizer does the work; you give it
3166/// permission via `#[target_feature]`, which `#[autoversion]` handles.
3167///
3168/// # The simple win
3169///
3170/// ```rust,ignore
3171/// use archmage::SimdToken;
3172///
3173/// #[autoversion]
3174/// fn sum_of_squares(_token: SimdToken, data: &[f32]) -> f32 {
3175///     let mut sum = 0.0f32;
3176///     for &x in data {
3177///         sum += x * x;
3178///     }
3179///     sum
3180/// }
3181///
3182/// // Call directly — no token, no unsafe:
3183/// let result = sum_of_squares(&my_data);
3184/// ```
3185///
3186/// The `_token` parameter is never used in the body. It exists so the macro
3187/// knows where to substitute concrete token types. Each generated variant
3188/// gets `#[arcane]` → `#[target_feature(enable = "avx2,fma,...")]`, which
3189/// unlocks the compiler's auto-vectorizer for that feature set.
3190///
3191/// On x86-64 with the `_v3` variant (AVX2+FMA), that loop compiles to
3192/// `vfmadd231ps` — fused multiply-add on 8 floats per cycle. On aarch64
3193/// with NEON, you get `fmla`. The `_scalar` fallback compiles without any
3194/// SIMD target features, as a safety net for unknown hardware.
3195///
3196/// # Chunks + remainder
3197///
3198/// The classic data-processing pattern works naturally:
3199///
3200/// ```rust,ignore
3201/// #[autoversion]
3202/// fn normalize(_token: SimdToken, data: &mut [f32], scale: f32) {
3203///     // Compiler auto-vectorizes this — no manual SIMD needed.
3204///     // On v3, this becomes vdivps + vmulps on 8 floats at a time.
3205///     for x in data.iter_mut() {
3206///         *x = (*x - 128.0) * scale;
3207///     }
3208/// }
3209/// ```
3210///
3211/// If you want explicit control over chunk boundaries (e.g., for
3212/// accumulator patterns), that works too:
3213///
3214/// ```rust,ignore
3215/// #[autoversion]
3216/// fn dot_product(_token: SimdToken, a: &[f32], b: &[f32]) -> f32 {
3217///     let n = a.len().min(b.len());
3218///     let mut sum = 0.0f32;
3219///     for i in 0..n {
3220///         sum += a[i] * b[i];
3221///     }
3222///     sum
3223/// }
3224/// ```
3225///
3226/// The compiler decides the chunk size based on the target features of each
3227/// variant (8 floats for AVX2, 4 for NEON, 1 for scalar).
3228///
3229/// # What gets generated
3230///
3231/// With default tiers, `#[autoversion] fn process(_t: SimdToken, data: &[f32]) -> f32`
3232/// expands to:
3233///
3234/// - `process_v4(token: X64V4Token, ...)` — AVX-512 (behind `#[cfg(feature = "avx512")]`)
3235/// - `process_v3(token: X64V3Token, ...)` — AVX2+FMA
3236/// - `process_neon(token: NeonToken, ...)` — aarch64 NEON
3237/// - `process_wasm128(token: Wasm128Token, ...)` — WASM SIMD
3238/// - `process_scalar(token: ScalarToken, ...)` — no SIMD, always available
3239/// - `process(data: &[f32]) -> f32` — **dispatcher** (SimdToken param removed)
3240///
3241/// Each non-scalar variant is wrapped in `#[arcane]` (for `#[target_feature]`)
3242/// and `#[cfg(target_arch = ...)]`. The dispatcher does runtime CPU feature
3243/// detection via `Token::summon()` and calls the best match. When compiled
3244/// with `-C target-cpu=native`, the detection is elided by the compiler.
3245///
3246/// The suffixed variants are private sibling functions — only the dispatcher
3247/// is public. Within the same module, you can call them directly for testing
3248/// or benchmarking.
3249///
3250/// # SimdToken replacement
3251///
3252/// `#[autoversion]` replaces the `SimdToken` type annotation in the function
3253/// signature with the concrete token type for each variant (e.g.,
3254/// `archmage::X64V3Token`). Only the parameter's type changes — the function
3255/// body is never reparsed, which keeps compile times low.
3256///
3257/// The token variable (whatever you named it — `token`, `_token`, `_t`)
3258/// keeps working in the body because its type comes from the signature.
3259/// So `f32x8::from_array(token, ...)` works — `token` is now an `X64V3Token`
3260/// which satisfies the same trait bounds as `SimdToken`.
3261///
3262/// `#[magetypes]` takes a different approach: it replaces the text `Token`
3263/// everywhere in the function — signature and body — via string substitution.
3264/// Use `#[magetypes]` when you need body-level type substitution (e.g.,
3265/// `Token`-dependent constants or type aliases that differ per variant).
3266/// Use `#[autoversion]` when you want compiler auto-vectorization of scalar
3267/// code with zero boilerplate.
3268///
3269/// # Benchmarking
3270///
3271/// Measure the speedup with a side-by-side comparison. The generated
3272/// `_scalar` variant serves as the baseline; the dispatcher picks the
3273/// best available:
3274///
3275/// ```rust,ignore
3276/// use criterion::{Criterion, black_box, criterion_group, criterion_main};
3277/// use archmage::SimdToken;
3278///
3279/// #[autoversion]
3280/// fn sum_squares(_token: SimdToken, data: &[f32]) -> f32 {
3281///     data.iter().map(|&x| x * x).fold(0.0f32, |a, b| a + b)
3282/// }
3283///
3284/// fn bench(c: &mut Criterion) {
3285///     let data: Vec<f32> = (0..4096).map(|i| i as f32 * 0.01).collect();
3286///     let mut group = c.benchmark_group("sum_squares");
3287///
3288///     // Dispatched — picks best available at runtime
3289///     group.bench_function("dispatched", |b| {
3290///         b.iter(|| sum_squares(black_box(&data)))
3291///     });
3292///
3293///     // Scalar baseline — no target_feature, no auto-vectorization
3294///     group.bench_function("scalar", |b| {
3295///         b.iter(|| sum_squares_scalar(archmage::ScalarToken, black_box(&data)))
3296///     });
3297///
3298///     // Specific tier (useful for isolating which tier wins)
3299///     #[cfg(target_arch = "x86_64")]
3300///     if let Some(t) = archmage::X64V3Token::summon() {
3301///         group.bench_function("v3_avx2_fma", |b| {
3302///             b.iter(|| sum_squares_v3(t, black_box(&data)));
3303///         });
3304///     }
3305///
3306///     group.finish();
3307/// }
3308///
3309/// criterion_group!(benches, bench);
3310/// criterion_main!(benches);
3311/// ```
3312///
3313/// For a tight numeric loop on x86-64, the `_v3` variant (AVX2+FMA)
3314/// typically runs 4-8x faster than `_scalar` because `#[target_feature]`
3315/// unlocks auto-vectorization that the baseline build can't use.
3316///
3317/// # Explicit tiers
3318///
3319/// ```rust,ignore
3320/// #[autoversion(v3, v4, v4x, neon, arm_v2, wasm128)]
3321/// fn process(_token: SimdToken, data: &[f32]) -> f32 {
3322///     // ...
3323/// }
3324/// ```
3325///
3326/// `scalar` is always included implicitly.
3327///
3328/// Default tiers (when no list given): `v4`, `v3`, `neon`, `wasm128`, `scalar`.
3329///
3330/// Known tiers: `v1`, `v2`, `v3`, `v3_crypto`, `v4`, `v4x`, `neon`,
3331/// `neon_aes`, `neon_sha3`, `neon_crc`, `arm_v2`, `arm_v3`, `wasm128`,
3332/// `wasm128_relaxed`, `x64_crypto`, `scalar`.
3333///
3334/// # Methods with self receivers
3335///
3336/// For inherent methods, `self` works naturally — no `_self` needed:
3337///
3338/// ```rust,ignore
3339/// impl ImageBuffer {
3340///     #[autoversion]
3341///     fn normalize(&mut self, token: SimdToken, gamma: f32) {
3342///         for pixel in &mut self.data {
3343///             *pixel = (*pixel / 255.0).powf(gamma);
3344///         }
3345///     }
3346/// }
3347///
3348/// // Call normally — no token:
3349/// buffer.normalize(2.2);
3350/// ```
3351///
3352/// All receiver types work: `self`, `&self`, `&mut self`. Non-scalar variants
3353/// get `#[arcane]` (sibling mode), where `self`/`Self` resolve naturally.
3354///
3355/// # Trait methods (requires `_self = Type`)
3356///
3357/// Trait methods can't use `#[autoversion]` directly because proc macro
3358/// attributes on trait impl items can't expand to multiple sibling functions.
3359/// Use the delegation pattern with `_self = Type`:
3360///
3361/// ```rust,ignore
3362/// trait Processor {
3363///     fn process(&self, data: &[f32]) -> f32;
3364/// }
3365///
3366/// impl Processor for MyType {
3367///     fn process(&self, data: &[f32]) -> f32 {
3368///         self.process_impl(data) // delegate to autoversioned method
3369///     }
3370/// }
3371///
3372/// impl MyType {
3373///     #[autoversion(_self = MyType)]
3374///     fn process_impl(&self, token: SimdToken, data: &[f32]) -> f32 {
3375///         _self.weights.iter().zip(data).map(|(w, d)| w * d).sum()
3376///     }
3377/// }
3378/// ```
3379///
3380/// `_self = Type` uses nested mode in `#[arcane]`, which is required for
3381/// trait impls. Use `_self` (not `self`) in the body when using this form.
3382///
3383/// # Comparison with `#[magetypes]` + `incant!`
3384///
3385/// | | `#[autoversion]` | `#[magetypes]` + `incant!` |
3386/// |---|---|---|
3387/// | Placeholder | `SimdToken` | `Token` |
3388/// | Generates variants | Yes | Yes (magetypes) |
3389/// | Generates dispatcher | Yes | No (you write `incant!`) |
3390/// | Best for | Scalar auto-vectorization | Explicit SIMD with typed vectors |
3391/// | Lines of code | 1 attribute | 2+ (magetypes + incant + arcane) |
3392///
3393/// Use `#[autoversion]` for scalar loops you want auto-vectorized. Use
3394/// `#[magetypes]` + `incant!` when you need `f32x8`, `u8x32`, and
3395/// hand-tuned SIMD code per architecture
3396#[proc_macro_attribute]
3397pub fn autoversion(attr: TokenStream, item: TokenStream) -> TokenStream {
3398    let args = parse_macro_input!(attr as AutoversionArgs);
3399    let input_fn = parse_macro_input!(item as LightFn);
3400    autoversion_impl(input_fn, args)
3401}
3402
3403// =============================================================================
3404// Unit tests for token/trait recognition maps
3405// =============================================================================
3406
3407#[cfg(test)]
3408mod tests {
3409    use super::*;
3410
3411    use super::generated::{ALL_CONCRETE_TOKENS, ALL_TRAIT_NAMES};
3412    use syn::{ItemFn, ReturnType};
3413
3414    #[test]
3415    fn every_concrete_token_is_in_token_to_features() {
3416        for &name in ALL_CONCRETE_TOKENS {
3417            assert!(
3418                token_to_features(name).is_some(),
3419                "Token `{}` exists in runtime crate but is NOT recognized by \
3420                 token_to_features() in the proc macro. Add it!",
3421                name
3422            );
3423        }
3424    }
3425
3426    #[test]
3427    fn every_trait_is_in_trait_to_features() {
3428        for &name in ALL_TRAIT_NAMES {
3429            assert!(
3430                trait_to_features(name).is_some(),
3431                "Trait `{}` exists in runtime crate but is NOT recognized by \
3432                 trait_to_features() in the proc macro. Add it!",
3433                name
3434            );
3435        }
3436    }
3437
3438    #[test]
3439    fn token_aliases_map_to_same_features() {
3440        // Desktop64 = X64V3Token
3441        assert_eq!(
3442            token_to_features("Desktop64"),
3443            token_to_features("X64V3Token"),
3444            "Desktop64 and X64V3Token should map to identical features"
3445        );
3446
3447        // Server64 = X64V4Token = Avx512Token
3448        assert_eq!(
3449            token_to_features("Server64"),
3450            token_to_features("X64V4Token"),
3451            "Server64 and X64V4Token should map to identical features"
3452        );
3453        assert_eq!(
3454            token_to_features("X64V4Token"),
3455            token_to_features("Avx512Token"),
3456            "X64V4Token and Avx512Token should map to identical features"
3457        );
3458
3459        // Arm64 = NeonToken
3460        assert_eq!(
3461            token_to_features("Arm64"),
3462            token_to_features("NeonToken"),
3463            "Arm64 and NeonToken should map to identical features"
3464        );
3465    }
3466
3467    #[test]
3468    fn trait_to_features_includes_tokens_as_bounds() {
3469        // Tier tokens should also work as trait bounds
3470        // (for `impl X64V3Token` patterns, even though Rust won't allow it,
3471        // the macro processes AST before type checking)
3472        let tier_tokens = [
3473            "X64V2Token",
3474            "X64CryptoToken",
3475            "X64V3Token",
3476            "Desktop64",
3477            "Avx2FmaToken",
3478            "X64V4Token",
3479            "Avx512Token",
3480            "Server64",
3481            "X64V4xToken",
3482            "Avx512Fp16Token",
3483            "NeonToken",
3484            "Arm64",
3485            "NeonAesToken",
3486            "NeonSha3Token",
3487            "NeonCrcToken",
3488            "Arm64V2Token",
3489            "Arm64V3Token",
3490        ];
3491
3492        for &name in &tier_tokens {
3493            assert!(
3494                trait_to_features(name).is_some(),
3495                "Tier token `{}` should also be recognized in trait_to_features() \
3496                 for use as a generic bound. Add it!",
3497                name
3498            );
3499        }
3500    }
3501
3502    #[test]
3503    fn trait_features_are_cumulative() {
3504        // HasX64V4 should include all HasX64V2 features plus more
3505        let v2_features = trait_to_features("HasX64V2").unwrap();
3506        let v4_features = trait_to_features("HasX64V4").unwrap();
3507
3508        for &f in v2_features {
3509            assert!(
3510                v4_features.contains(&f),
3511                "HasX64V4 should include v2 feature `{}` but doesn't",
3512                f
3513            );
3514        }
3515
3516        // v4 should have more features than v2
3517        assert!(
3518            v4_features.len() > v2_features.len(),
3519            "HasX64V4 should have more features than HasX64V2"
3520        );
3521    }
3522
3523    #[test]
3524    fn x64v3_trait_features_include_v2() {
3525        // X64V3Token as trait bound should include v2 features
3526        let v2 = trait_to_features("HasX64V2").unwrap();
3527        let v3 = trait_to_features("X64V3Token").unwrap();
3528
3529        for &f in v2 {
3530            assert!(
3531                v3.contains(&f),
3532                "X64V3Token trait features should include v2 feature `{}` but don't",
3533                f
3534            );
3535        }
3536    }
3537
3538    #[test]
3539    fn has_neon_aes_includes_neon() {
3540        let neon = trait_to_features("HasNeon").unwrap();
3541        let neon_aes = trait_to_features("HasNeonAes").unwrap();
3542
3543        for &f in neon {
3544            assert!(
3545                neon_aes.contains(&f),
3546                "HasNeonAes should include NEON feature `{}`",
3547                f
3548            );
3549        }
3550    }
3551
3552    #[test]
3553    fn no_removed_traits_are_recognized() {
3554        // These traits were removed in 0.3.0 and should NOT be recognized
3555        let removed = [
3556            "HasSse",
3557            "HasSse2",
3558            "HasSse41",
3559            "HasSse42",
3560            "HasAvx",
3561            "HasAvx2",
3562            "HasFma",
3563            "HasAvx512f",
3564            "HasAvx512bw",
3565            "HasAvx512vl",
3566            "HasAvx512vbmi2",
3567            "HasSve",
3568            "HasSve2",
3569        ];
3570
3571        for &name in &removed {
3572            assert!(
3573                trait_to_features(name).is_none(),
3574                "Removed trait `{}` should NOT be in trait_to_features(). \
3575                 It was removed in 0.3.0 — users should migrate to tier traits.",
3576                name
3577            );
3578        }
3579    }
3580
3581    #[test]
3582    fn no_nonexistent_tokens_are_recognized() {
3583        // These tokens don't exist and should NOT be recognized
3584        let fake = [
3585            "SveToken",
3586            "Sve2Token",
3587            "Avx512VnniToken",
3588            "X64V4ModernToken",
3589            "NeonFp16Token",
3590        ];
3591
3592        for &name in &fake {
3593            assert!(
3594                token_to_features(name).is_none(),
3595                "Non-existent token `{}` should NOT be in token_to_features()",
3596                name
3597            );
3598        }
3599    }
3600
3601    #[test]
3602    fn featureless_traits_are_not_in_registries() {
3603        // SimdToken and IntoConcreteToken should NOT be in any feature registry
3604        // because they don't map to CPU features
3605        for &name in FEATURELESS_TRAIT_NAMES {
3606            assert!(
3607                token_to_features(name).is_none(),
3608                "`{}` should NOT be in token_to_features() — it has no CPU features",
3609                name
3610            );
3611            assert!(
3612                trait_to_features(name).is_none(),
3613                "`{}` should NOT be in trait_to_features() — it has no CPU features",
3614                name
3615            );
3616        }
3617    }
3618
3619    #[test]
3620    fn find_featureless_trait_detects_simdtoken() {
3621        let names = vec!["SimdToken".to_string()];
3622        assert_eq!(find_featureless_trait(&names), Some("SimdToken"));
3623
3624        let names = vec!["IntoConcreteToken".to_string()];
3625        assert_eq!(find_featureless_trait(&names), Some("IntoConcreteToken"));
3626
3627        // Feature-bearing traits should NOT be detected
3628        let names = vec!["HasX64V2".to_string()];
3629        assert_eq!(find_featureless_trait(&names), None);
3630
3631        let names = vec!["HasNeon".to_string()];
3632        assert_eq!(find_featureless_trait(&names), None);
3633
3634        // Mixed: if SimdToken is among real traits, still detected
3635        let names = vec!["SimdToken".to_string(), "HasX64V2".to_string()];
3636        assert_eq!(find_featureless_trait(&names), Some("SimdToken"));
3637    }
3638
3639    #[test]
3640    fn arm64_v2_v3_traits_are_cumulative() {
3641        let v2_features = trait_to_features("HasArm64V2").unwrap();
3642        let v3_features = trait_to_features("HasArm64V3").unwrap();
3643
3644        for &f in v2_features {
3645            assert!(
3646                v3_features.contains(&f),
3647                "HasArm64V3 should include v2 feature `{}` but doesn't",
3648                f
3649            );
3650        }
3651
3652        assert!(
3653            v3_features.len() > v2_features.len(),
3654            "HasArm64V3 should have more features than HasArm64V2"
3655        );
3656    }
3657
3658    // =========================================================================
3659    // autoversion — argument parsing
3660    // =========================================================================
3661
3662    #[test]
3663    fn autoversion_args_empty() {
3664        let args: AutoversionArgs = syn::parse_str("").unwrap();
3665        assert!(args.self_type.is_none());
3666        assert!(args.tiers.is_none());
3667    }
3668
3669    #[test]
3670    fn autoversion_args_single_tier() {
3671        let args: AutoversionArgs = syn::parse_str("v3").unwrap();
3672        assert!(args.self_type.is_none());
3673        assert_eq!(args.tiers.as_ref().unwrap(), &["v3"]);
3674    }
3675
3676    #[test]
3677    fn autoversion_args_tiers_only() {
3678        let args: AutoversionArgs = syn::parse_str("v3, v4, neon").unwrap();
3679        assert!(args.self_type.is_none());
3680        let tiers = args.tiers.unwrap();
3681        assert_eq!(tiers, vec!["v3", "v4", "neon"]);
3682    }
3683
3684    #[test]
3685    fn autoversion_args_many_tiers() {
3686        let args: AutoversionArgs =
3687            syn::parse_str("v1, v2, v3, v4, v4x, neon, arm_v2, wasm128").unwrap();
3688        assert_eq!(
3689            args.tiers.unwrap(),
3690            vec!["v1", "v2", "v3", "v4", "v4x", "neon", "arm_v2", "wasm128"]
3691        );
3692    }
3693
3694    #[test]
3695    fn autoversion_args_trailing_comma() {
3696        let args: AutoversionArgs = syn::parse_str("v3, v4,").unwrap();
3697        assert_eq!(args.tiers.as_ref().unwrap(), &["v3", "v4"]);
3698    }
3699
3700    #[test]
3701    fn autoversion_args_self_only() {
3702        let args: AutoversionArgs = syn::parse_str("_self = MyType").unwrap();
3703        assert!(args.self_type.is_some());
3704        assert!(args.tiers.is_none());
3705    }
3706
3707    #[test]
3708    fn autoversion_args_self_and_tiers() {
3709        let args: AutoversionArgs = syn::parse_str("_self = MyType, v3, neon").unwrap();
3710        assert!(args.self_type.is_some());
3711        let tiers = args.tiers.unwrap();
3712        assert_eq!(tiers, vec!["v3", "neon"]);
3713    }
3714
3715    #[test]
3716    fn autoversion_args_tiers_then_self() {
3717        // _self can appear after tier names
3718        let args: AutoversionArgs = syn::parse_str("v3, neon, _self = MyType").unwrap();
3719        assert!(args.self_type.is_some());
3720        let tiers = args.tiers.unwrap();
3721        assert_eq!(tiers, vec!["v3", "neon"]);
3722    }
3723
3724    #[test]
3725    fn autoversion_args_self_with_path_type() {
3726        let args: AutoversionArgs = syn::parse_str("_self = crate::MyType").unwrap();
3727        assert!(args.self_type.is_some());
3728        assert!(args.tiers.is_none());
3729    }
3730
3731    #[test]
3732    fn autoversion_args_self_with_generic_type() {
3733        let args: AutoversionArgs = syn::parse_str("_self = Vec<u8>").unwrap();
3734        assert!(args.self_type.is_some());
3735        let ty_str = args.self_type.unwrap().to_token_stream().to_string();
3736        assert!(ty_str.contains("Vec"), "Expected Vec<u8>, got: {}", ty_str);
3737    }
3738
3739    #[test]
3740    fn autoversion_args_self_trailing_comma() {
3741        let args: AutoversionArgs = syn::parse_str("_self = MyType,").unwrap();
3742        assert!(args.self_type.is_some());
3743        assert!(args.tiers.is_none());
3744    }
3745
3746    // =========================================================================
3747    // autoversion — find_simd_token_param
3748    // =========================================================================
3749
3750    #[test]
3751    fn find_simd_token_param_first_position() {
3752        let f: ItemFn =
3753            syn::parse_str("fn process(token: SimdToken, data: &[f32]) -> f32 {}").unwrap();
3754        let param = find_simd_token_param(&f.sig).unwrap();
3755        assert_eq!(param.index, 0);
3756        assert_eq!(param.ident, "token");
3757    }
3758
3759    #[test]
3760    fn find_simd_token_param_second_position() {
3761        let f: ItemFn =
3762            syn::parse_str("fn process(data: &[f32], token: SimdToken) -> f32 {}").unwrap();
3763        let param = find_simd_token_param(&f.sig).unwrap();
3764        assert_eq!(param.index, 1);
3765        assert_eq!(param.ident, "token");
3766    }
3767
3768    #[test]
3769    fn find_simd_token_param_underscore_prefix() {
3770        let f: ItemFn =
3771            syn::parse_str("fn process(_token: SimdToken, data: &[f32]) -> f32 {}").unwrap();
3772        let param = find_simd_token_param(&f.sig).unwrap();
3773        assert_eq!(param.index, 0);
3774        assert_eq!(param.ident, "_token");
3775    }
3776
3777    #[test]
3778    fn find_simd_token_param_wildcard() {
3779        let f: ItemFn = syn::parse_str("fn process(_: SimdToken, data: &[f32]) -> f32 {}").unwrap();
3780        let param = find_simd_token_param(&f.sig).unwrap();
3781        assert_eq!(param.index, 0);
3782        assert_eq!(param.ident, "__autoversion_token");
3783    }
3784
3785    #[test]
3786    fn find_simd_token_param_not_found() {
3787        let f: ItemFn = syn::parse_str("fn process(data: &[f32]) -> f32 {}").unwrap();
3788        assert!(find_simd_token_param(&f.sig).is_none());
3789    }
3790
3791    #[test]
3792    fn find_simd_token_param_no_params() {
3793        let f: ItemFn = syn::parse_str("fn process() {}").unwrap();
3794        assert!(find_simd_token_param(&f.sig).is_none());
3795    }
3796
3797    #[test]
3798    fn find_simd_token_param_concrete_token_not_matched() {
3799        // autoversion looks specifically for SimdToken, not concrete tokens
3800        let f: ItemFn =
3801            syn::parse_str("fn process(token: X64V3Token, data: &[f32]) -> f32 {}").unwrap();
3802        assert!(find_simd_token_param(&f.sig).is_none());
3803    }
3804
3805    #[test]
3806    fn find_simd_token_param_scalar_token_not_matched() {
3807        let f: ItemFn =
3808            syn::parse_str("fn process(token: ScalarToken, data: &[f32]) -> f32 {}").unwrap();
3809        assert!(find_simd_token_param(&f.sig).is_none());
3810    }
3811
3812    #[test]
3813    fn find_simd_token_param_among_many() {
3814        let f: ItemFn = syn::parse_str(
3815            "fn process(a: i32, b: f64, token: SimdToken, c: &str, d: bool) -> f32 {}",
3816        )
3817        .unwrap();
3818        let param = find_simd_token_param(&f.sig).unwrap();
3819        assert_eq!(param.index, 2);
3820        assert_eq!(param.ident, "token");
3821    }
3822
3823    #[test]
3824    fn find_simd_token_param_with_generics() {
3825        let f: ItemFn =
3826            syn::parse_str("fn process<T: Clone>(token: SimdToken, data: &[T]) -> T {}").unwrap();
3827        let param = find_simd_token_param(&f.sig).unwrap();
3828        assert_eq!(param.index, 0);
3829        assert_eq!(param.ident, "token");
3830    }
3831
3832    #[test]
3833    fn find_simd_token_param_with_where_clause() {
3834        let f: ItemFn = syn::parse_str(
3835            "fn process<T>(token: SimdToken, data: &[T]) -> T where T: Copy + Default {}",
3836        )
3837        .unwrap();
3838        let param = find_simd_token_param(&f.sig).unwrap();
3839        assert_eq!(param.index, 0);
3840    }
3841
3842    #[test]
3843    fn find_simd_token_param_with_lifetime() {
3844        let f: ItemFn =
3845            syn::parse_str("fn process<'a>(token: SimdToken, data: &'a [f32]) -> &'a f32 {}")
3846                .unwrap();
3847        let param = find_simd_token_param(&f.sig).unwrap();
3848        assert_eq!(param.index, 0);
3849    }
3850
3851    // =========================================================================
3852    // autoversion — tier resolution
3853    // =========================================================================
3854
3855    #[test]
3856    fn autoversion_default_tiers_all_resolve() {
3857        let names: Vec<String> = DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect();
3858        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3859        assert!(!tiers.is_empty());
3860        // scalar should be present
3861        assert!(tiers.iter().any(|t| t.name == "scalar"));
3862    }
3863
3864    #[test]
3865    fn autoversion_scalar_always_appended() {
3866        let names = vec!["v3".to_string(), "neon".to_string()];
3867        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3868        assert!(
3869            tiers.iter().any(|t| t.name == "scalar"),
3870            "scalar must be auto-appended"
3871        );
3872    }
3873
3874    #[test]
3875    fn autoversion_scalar_not_duplicated() {
3876        let names = vec!["v3".to_string(), "scalar".to_string()];
3877        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3878        let scalar_count = tiers.iter().filter(|t| t.name == "scalar").count();
3879        assert_eq!(scalar_count, 1, "scalar must not be duplicated");
3880    }
3881
3882    #[test]
3883    fn autoversion_tiers_sorted_by_priority() {
3884        let names = vec!["neon".to_string(), "v4".to_string(), "v3".to_string()];
3885        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3886        // v4 (priority 40) > v3 (30) > neon (20) > scalar (0)
3887        let priorities: Vec<u32> = tiers.iter().map(|t| t.priority).collect();
3888        for window in priorities.windows(2) {
3889            assert!(
3890                    window[0] >= window[1],
3891            cfg_feature: None,
3892                    "Tiers not sorted by priority: {:?}",
3893                    priorities
3894                );
3895        }
3896    }
3897
3898    #[test]
3899    fn autoversion_unknown_tier_errors() {
3900        let names = vec!["v3".to_string(), "avx9000".to_string()];
3901        let result = resolve_tiers(&names, proc_macro2::Span::call_site(), false);
3902        match result {
3903            Ok(_) => panic!("Expected error for unknown tier 'avx9000'"),
3904            Err(e) => {
3905                let err_msg = e.to_string();
3906                assert!(
3907                    err_msg.contains("avx9000"),
3908                    "Error should mention unknown tier: {}",
3909                    err_msg
3910                );
3911            }
3912        }
3913    }
3914
3915    #[test]
3916    fn autoversion_all_known_tiers_resolve() {
3917        // Every tier in ALL_TIERS should be findable
3918        for tier in ALL_TIERS {
3919            assert!(
3920                find_tier(tier.name).is_some(),
3921                "Tier '{}' should be findable by name",
3922                tier.name
3923            );
3924        }
3925    }
3926
3927    #[test]
3928    fn autoversion_default_tier_list_is_sensible() {
3929        // Defaults should cover x86, ARM, WASM, and scalar
3930        let names: Vec<String> = DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect();
3931        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
3932
3933        let has_x86 = tiers.iter().any(|t| t.target_arch == Some("x86_64"));
3934        let has_arm = tiers.iter().any(|t| t.target_arch == Some("aarch64"));
3935        let has_wasm = tiers.iter().any(|t| t.target_arch == Some("wasm32"));
3936        let has_scalar = tiers.iter().any(|t| t.name == "scalar");
3937
3938        assert!(has_x86, "Default tiers should include an x86_64 tier");
3939        assert!(has_arm, "Default tiers should include an aarch64 tier");
3940        assert!(has_wasm, "Default tiers should include a wasm32 tier");
3941        assert!(has_scalar, "Default tiers should include scalar");
3942    }
3943
3944    // =========================================================================
3945    // autoversion — variant replacement (AST manipulation)
3946    // =========================================================================
3947
3948    /// Mirrors what `autoversion_impl` does for a single variant: parse an
3949    /// ItemFn (for test convenience), rename it, swap the SimdToken param
3950    /// type, optionally inject the `_self` preamble for scalar+self.
3951    fn do_variant_replacement(func: &str, tier_name: &str, has_self: bool) -> ItemFn {
3952        let mut f: ItemFn = syn::parse_str(func).unwrap();
3953        let fn_name = f.sig.ident.to_string();
3954
3955        let tier = find_tier(tier_name).unwrap();
3956
3957        // Rename
3958        f.sig.ident = format_ident!("{}_{}", fn_name, tier.suffix);
3959
3960        // Find and replace SimdToken param type
3961        let token_idx = find_simd_token_param(&f.sig)
3962            .unwrap_or_else(|| panic!("No SimdToken param in: {}", func))
3963            .index;
3964        let concrete_type: Type = syn::parse_str(tier.token_path).unwrap();
3965        if let FnArg::Typed(pt) = &mut f.sig.inputs[token_idx] {
3966            *pt.ty = concrete_type;
3967        }
3968
3969        // Scalar + self: inject preamble
3970        if tier_name == "scalar" && has_self {
3971            let preamble: syn::Stmt = syn::parse_quote!(let _self = self;);
3972            f.block.stmts.insert(0, preamble);
3973        }
3974
3975        f
3976    }
3977
3978    #[test]
3979    fn variant_replacement_v3_renames_function() {
3980        let f = do_variant_replacement(
3981            "fn process(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3982            "v3",
3983            false,
3984        );
3985        assert_eq!(f.sig.ident, "process_v3");
3986    }
3987
3988    #[test]
3989    fn variant_replacement_v3_replaces_token_type() {
3990        let f = do_variant_replacement(
3991            "fn process(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
3992            "v3",
3993            false,
3994        );
3995        let first_param_ty = match &f.sig.inputs[0] {
3996            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
3997            _ => panic!("Expected typed param"),
3998        };
3999        assert!(
4000            first_param_ty.contains("X64V3Token"),
4001            "Expected X64V3Token, got: {}",
4002            first_param_ty
4003        );
4004    }
4005
4006    #[test]
4007    fn variant_replacement_neon_produces_valid_fn() {
4008        let f = do_variant_replacement(
4009            "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
4010            "neon",
4011            false,
4012        );
4013        assert_eq!(f.sig.ident, "compute_neon");
4014        let first_param_ty = match &f.sig.inputs[0] {
4015            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
4016            _ => panic!("Expected typed param"),
4017        };
4018        assert!(
4019            first_param_ty.contains("NeonToken"),
4020            "Expected NeonToken, got: {}",
4021            first_param_ty
4022        );
4023    }
4024
4025    #[test]
4026    fn variant_replacement_wasm128_produces_valid_fn() {
4027        let f = do_variant_replacement(
4028            "fn compute(_t: SimdToken, data: &[f32]) -> f32 { 0.0 }",
4029            "wasm128",
4030            false,
4031        );
4032        assert_eq!(f.sig.ident, "compute_wasm128");
4033    }
4034
4035    #[test]
4036    fn variant_replacement_scalar_produces_valid_fn() {
4037        let f = do_variant_replacement(
4038            "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
4039            "scalar",
4040            false,
4041        );
4042        assert_eq!(f.sig.ident, "compute_scalar");
4043        let first_param_ty = match &f.sig.inputs[0] {
4044            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
4045            _ => panic!("Expected typed param"),
4046        };
4047        assert!(
4048            first_param_ty.contains("ScalarToken"),
4049            "Expected ScalarToken, got: {}",
4050            first_param_ty
4051        );
4052    }
4053
4054    #[test]
4055    fn variant_replacement_v4_produces_valid_fn() {
4056        let f = do_variant_replacement(
4057            "fn transform(token: SimdToken, data: &mut [f32]) { }",
4058            "v4",
4059            false,
4060        );
4061        assert_eq!(f.sig.ident, "transform_v4");
4062        let first_param_ty = match &f.sig.inputs[0] {
4063            FnArg::Typed(pt) => pt.ty.to_token_stream().to_string(),
4064            _ => panic!("Expected typed param"),
4065        };
4066        assert!(
4067            first_param_ty.contains("X64V4Token"),
4068            "Expected X64V4Token, got: {}",
4069            first_param_ty
4070        );
4071    }
4072
4073    #[test]
4074    fn variant_replacement_v4x_produces_valid_fn() {
4075        let f = do_variant_replacement(
4076            "fn transform(token: SimdToken, data: &mut [f32]) { }",
4077            "v4x",
4078            false,
4079        );
4080        assert_eq!(f.sig.ident, "transform_v4x");
4081    }
4082
4083    #[test]
4084    fn variant_replacement_arm_v2_produces_valid_fn() {
4085        let f = do_variant_replacement(
4086            "fn transform(token: SimdToken, data: &mut [f32]) { }",
4087            "arm_v2",
4088            false,
4089        );
4090        assert_eq!(f.sig.ident, "transform_arm_v2");
4091    }
4092
4093    #[test]
4094    fn variant_replacement_preserves_generics() {
4095        let f = do_variant_replacement(
4096            "fn process<T: Copy + Default>(token: SimdToken, data: &[T]) -> T { T::default() }",
4097            "v3",
4098            false,
4099        );
4100        assert_eq!(f.sig.ident, "process_v3");
4101        // Generic params should still be present
4102        assert!(
4103            !f.sig.generics.params.is_empty(),
4104            "Generics should be preserved"
4105        );
4106    }
4107
4108    #[test]
4109    fn variant_replacement_preserves_where_clause() {
4110        let f = do_variant_replacement(
4111            "fn process<T>(token: SimdToken, data: &[T]) -> T where T: Copy + Default { T::default() }",
4112            "v3",
4113            false,
4114        );
4115        assert!(
4116            f.sig.generics.where_clause.is_some(),
4117            "Where clause should be preserved"
4118        );
4119    }
4120
4121    #[test]
4122    fn variant_replacement_preserves_return_type() {
4123        let f = do_variant_replacement(
4124            "fn process(token: SimdToken, data: &[f32]) -> Vec<f32> { vec![] }",
4125            "neon",
4126            false,
4127        );
4128        let ret = f.sig.output.to_token_stream().to_string();
4129        assert!(
4130            ret.contains("Vec"),
4131            "Return type should be preserved, got: {}",
4132            ret
4133        );
4134    }
4135
4136    #[test]
4137    fn variant_replacement_preserves_multiple_params() {
4138        let f = do_variant_replacement(
4139            "fn process(token: SimdToken, a: &[f32], b: &[f32], scale: f32) -> f32 { 0.0 }",
4140            "v3",
4141            false,
4142        );
4143        // SimdToken → X64V3Token, plus the 3 other params
4144        assert_eq!(f.sig.inputs.len(), 4);
4145    }
4146
4147    #[test]
4148    fn variant_replacement_preserves_no_return_type() {
4149        let f = do_variant_replacement(
4150            "fn transform(token: SimdToken, data: &mut [f32]) { }",
4151            "v3",
4152            false,
4153        );
4154        assert!(
4155            matches!(f.sig.output, ReturnType::Default),
4156            "No return type should remain as Default"
4157        );
4158    }
4159
4160    #[test]
4161    fn variant_replacement_preserves_lifetime_params() {
4162        let f = do_variant_replacement(
4163            "fn process<'a>(token: SimdToken, data: &'a [f32]) -> &'a [f32] { data }",
4164            "v3",
4165            false,
4166        );
4167        assert!(!f.sig.generics.params.is_empty());
4168    }
4169
4170    #[test]
4171    fn variant_replacement_scalar_self_injects_preamble() {
4172        let f = do_variant_replacement(
4173            "fn method(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
4174            "scalar",
4175            true, // has_self
4176        );
4177        assert_eq!(f.sig.ident, "method_scalar");
4178
4179        // First statement should be `let _self = self;`
4180        let body_str = f.block.to_token_stream().to_string();
4181        assert!(
4182            body_str.contains("let _self = self"),
4183            "Scalar+self variant should have _self preamble, got: {}",
4184            body_str
4185        );
4186    }
4187
4188    #[test]
4189    fn variant_replacement_all_default_tiers_produce_valid_fns() {
4190        let names: Vec<String> = DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect();
4191        let tiers = resolve_tiers(&names, proc_macro2::Span::call_site(), false).unwrap();
4192
4193        for tier in &tiers {
4194            let f = do_variant_replacement(
4195                "fn process(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
4196                tier.name,
4197                false,
4198            );
4199            let expected_name = format!("process_{}", tier.suffix);
4200            assert_eq!(
4201                f.sig.ident.to_string(),
4202                expected_name,
4203                "Tier '{}' should produce function '{}'",
4204                tier.name,
4205                expected_name
4206            );
4207        }
4208    }
4209
4210    #[test]
4211    fn variant_replacement_all_known_tiers_produce_valid_fns() {
4212        for tier in ALL_TIERS {
4213            let f = do_variant_replacement(
4214                "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
4215                tier.name,
4216                false,
4217            );
4218            let expected_name = format!("compute_{}", tier.suffix);
4219            assert_eq!(
4220                f.sig.ident.to_string(),
4221                expected_name,
4222                "Tier '{}' should produce function '{}'",
4223                tier.name,
4224                expected_name
4225            );
4226        }
4227    }
4228
4229    #[test]
4230    fn variant_replacement_no_simdtoken_remains() {
4231        for tier in ALL_TIERS {
4232            let f = do_variant_replacement(
4233                "fn compute(token: SimdToken, data: &[f32]) -> f32 { 0.0 }",
4234                tier.name,
4235                false,
4236            );
4237            let full_str = f.to_token_stream().to_string();
4238            assert!(
4239                !full_str.contains("SimdToken"),
4240                "Tier '{}' variant still contains 'SimdToken': {}",
4241                tier.name,
4242                full_str
4243            );
4244        }
4245    }
4246
4247    // =========================================================================
4248    // autoversion — cfg guard and tier descriptor properties
4249    // =========================================================================
4250
4251    #[test]
4252    fn tier_v3_targets_x86_64() {
4253        let tier = find_tier("v3").unwrap();
4254        assert_eq!(tier.target_arch, Some("x86_64"));
4255    }
4256
4257    #[test]
4258    fn tier_v4_targets_x86_64() {
4259        let tier = find_tier("v4").unwrap();
4260        assert_eq!(tier.target_arch, Some("x86_64"));
4261    }
4262
4263    #[test]
4264    fn tier_v4x_targets_x86_64() {
4265        let tier = find_tier("v4x").unwrap();
4266        assert_eq!(tier.target_arch, Some("x86_64"));
4267    }
4268
4269    #[test]
4270    fn tier_neon_targets_aarch64() {
4271        let tier = find_tier("neon").unwrap();
4272        assert_eq!(tier.target_arch, Some("aarch64"));
4273    }
4274
4275    #[test]
4276    fn tier_wasm128_targets_wasm32() {
4277        let tier = find_tier("wasm128").unwrap();
4278        assert_eq!(tier.target_arch, Some("wasm32"));
4279    }
4280
4281    #[test]
4282    fn tier_scalar_has_no_guards() {
4283        let tier = find_tier("scalar").unwrap();
4284        assert_eq!(tier.target_arch, None);
4285        assert_eq!(tier.priority, 0);
4286    }
4287
4288    #[test]
4289    fn tier_priorities_are_consistent() {
4290        // Higher-capability tiers within the same arch should have higher priority
4291        let v2 = find_tier("v2").unwrap();
4292        let v3 = find_tier("v3").unwrap();
4293        let v4 = find_tier("v4").unwrap();
4294        assert!(v4.priority > v3.priority);
4295        assert!(v3.priority > v2.priority);
4296
4297        let neon = find_tier("neon").unwrap();
4298        let arm_v2 = find_tier("arm_v2").unwrap();
4299        let arm_v3 = find_tier("arm_v3").unwrap();
4300        assert!(arm_v3.priority > arm_v2.priority);
4301        assert!(arm_v2.priority > neon.priority);
4302
4303        // scalar is lowest
4304        let scalar = find_tier("scalar").unwrap();
4305        assert!(neon.priority > scalar.priority);
4306        assert!(v2.priority > scalar.priority);
4307    }
4308
4309    // =========================================================================
4310    // autoversion — dispatcher structure
4311    // =========================================================================
4312
4313    #[test]
4314    fn dispatcher_param_removal_free_fn() {
4315        // Simulate what autoversion_impl does: remove the SimdToken param
4316        let f: ItemFn =
4317            syn::parse_str("fn process(token: SimdToken, data: &[f32], scale: f32) -> f32 { 0.0 }")
4318                .unwrap();
4319
4320        let token_param = find_simd_token_param(&f.sig).unwrap();
4321        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4322        dispatcher_inputs.remove(token_param.index);
4323
4324        // Should have 2 params remaining: data, scale
4325        assert_eq!(dispatcher_inputs.len(), 2);
4326
4327        // Neither should be SimdToken
4328        for arg in &dispatcher_inputs {
4329            if let FnArg::Typed(pt) = arg {
4330                let ty_str = pt.ty.to_token_stream().to_string();
4331                assert!(
4332                    !ty_str.contains("SimdToken"),
4333                    "SimdToken should be removed from dispatcher, found: {}",
4334                    ty_str
4335                );
4336            }
4337        }
4338    }
4339
4340    #[test]
4341    fn dispatcher_param_removal_token_only() {
4342        let f: ItemFn = syn::parse_str("fn process(token: SimdToken) -> f32 { 0.0 }").unwrap();
4343
4344        let token_param = find_simd_token_param(&f.sig).unwrap();
4345        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4346        dispatcher_inputs.remove(token_param.index);
4347
4348        // No params left — dispatcher takes no arguments
4349        assert_eq!(dispatcher_inputs.len(), 0);
4350    }
4351
4352    #[test]
4353    fn dispatcher_param_removal_token_last() {
4354        let f: ItemFn =
4355            syn::parse_str("fn process(data: &[f32], scale: f32, token: SimdToken) -> f32 { 0.0 }")
4356                .unwrap();
4357
4358        let token_param = find_simd_token_param(&f.sig).unwrap();
4359        assert_eq!(token_param.index, 2);
4360
4361        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4362        dispatcher_inputs.remove(token_param.index);
4363
4364        assert_eq!(dispatcher_inputs.len(), 2);
4365    }
4366
4367    #[test]
4368    fn dispatcher_dispatch_args_extraction() {
4369        // Test that we correctly extract idents for the dispatch call
4370        let f: ItemFn =
4371            syn::parse_str("fn process(data: &[f32], scale: f32) -> f32 { 0.0 }").unwrap();
4372
4373        let dispatch_args: Vec<String> = f
4374            .sig
4375            .inputs
4376            .iter()
4377            .filter_map(|arg| {
4378                if let FnArg::Typed(PatType { pat, .. }) = arg {
4379                    if let syn::Pat::Ident(pi) = pat.as_ref() {
4380                        return Some(pi.ident.to_string());
4381                    }
4382                }
4383                None
4384            })
4385            .collect();
4386
4387        assert_eq!(dispatch_args, vec!["data", "scale"]);
4388    }
4389
4390    #[test]
4391    fn dispatcher_wildcard_params_get_renamed() {
4392        let f: ItemFn = syn::parse_str("fn process(_: &[f32], _: f32) -> f32 { 0.0 }").unwrap();
4393
4394        let mut dispatcher_inputs: Vec<FnArg> = f.sig.inputs.iter().cloned().collect();
4395
4396        let mut wild_counter = 0u32;
4397        for arg in &mut dispatcher_inputs {
4398            if let FnArg::Typed(pat_type) = arg {
4399                if matches!(pat_type.pat.as_ref(), syn::Pat::Wild(_)) {
4400                    let ident = format_ident!("__autoversion_wild_{}", wild_counter);
4401                    wild_counter += 1;
4402                    *pat_type.pat = syn::Pat::Ident(syn::PatIdent {
4403                        attrs: vec![],
4404                        by_ref: None,
4405                        mutability: None,
4406                        ident,
4407                        subpat: None,
4408                    });
4409                }
4410            }
4411        }
4412
4413        // Both wildcards should be renamed
4414        assert_eq!(wild_counter, 2);
4415
4416        let names: Vec<String> = dispatcher_inputs
4417            .iter()
4418            .filter_map(|arg| {
4419                if let FnArg::Typed(PatType { pat, .. }) = arg {
4420                    if let syn::Pat::Ident(pi) = pat.as_ref() {
4421                        return Some(pi.ident.to_string());
4422                    }
4423                }
4424                None
4425            })
4426            .collect();
4427
4428        assert_eq!(names, vec!["__autoversion_wild_0", "__autoversion_wild_1"]);
4429    }
4430
4431    // =========================================================================
4432    // autoversion — suffix_path (reused in dispatch)
4433    // =========================================================================
4434
4435    #[test]
4436    fn suffix_path_simple() {
4437        let path: syn::Path = syn::parse_str("process").unwrap();
4438        let suffixed = suffix_path(&path, "v3");
4439        assert_eq!(suffixed.to_token_stream().to_string(), "process_v3");
4440    }
4441
4442    #[test]
4443    fn suffix_path_qualified() {
4444        let path: syn::Path = syn::parse_str("module::process").unwrap();
4445        let suffixed = suffix_path(&path, "neon");
4446        let s = suffixed.to_token_stream().to_string();
4447        assert!(
4448            s.contains("process_neon"),
4449            "Expected process_neon, got: {}",
4450            s
4451        );
4452    }
4453}