archmage_macros/
lib.rs

1//! Proc-macros for archmage SIMD capability tokens.
2//!
3//! Provides `#[arcane]` attribute (with `#[arcane]` alias) to make raw intrinsics
4//! safe via token proof.
5
6use proc_macro::TokenStream;
7use quote::{format_ident, quote, ToTokens};
8use syn::{
9    fold::Fold,
10    parse::{Parse, ParseStream},
11    parse_macro_input, parse_quote, Attribute, FnArg, GenericParam, Ident, ItemFn, PatType,
12    ReturnType, Signature, Token, Type, TypeParamBound,
13};
14
15/// A Fold implementation that replaces `Self` with a concrete type.
16struct ReplaceSelf<'a> {
17    replacement: &'a Type,
18}
19
20impl Fold for ReplaceSelf<'_> {
21    fn fold_type(&mut self, ty: Type) -> Type {
22        match ty {
23            Type::Path(ref type_path) if type_path.qself.is_none() => {
24                // Check if it's just `Self`
25                if type_path.path.is_ident("Self") {
26                    return self.replacement.clone();
27                }
28                // Otherwise continue folding
29                syn::fold::fold_type(self, ty)
30            }
31            _ => syn::fold::fold_type(self, ty),
32        }
33    }
34}
35
36/// Arguments to the `#[arcane]` macro.
37#[derive(Default)]
38struct ArcaneArgs {
39    /// Use `#[inline(always)]` instead of `#[inline]` for the inner function.
40    /// Requires nightly Rust with `#![feature(target_feature_inline_always)]`.
41    inline_always: bool,
42    /// The concrete type to use for `self` receiver.
43    /// When specified, `self`/`&self`/`&mut self` is transformed to `_self: Type`/`&Type`/`&mut Type`.
44    self_type: Option<Type>,
45}
46
47impl Parse for ArcaneArgs {
48    fn parse(input: ParseStream) -> syn::Result<Self> {
49        let mut args = ArcaneArgs::default();
50
51        while !input.is_empty() {
52            let ident: Ident = input.parse()?;
53            match ident.to_string().as_str() {
54                "inline_always" => args.inline_always = true,
55                "_self" => {
56                    let _: Token![=] = input.parse()?;
57                    args.self_type = Some(input.parse()?);
58                }
59                other => {
60                    return Err(syn::Error::new(
61                        ident.span(),
62                        format!("unknown arcane argument: `{}`", other),
63                    ))
64                }
65            }
66            // Consume optional comma
67            if input.peek(Token![,]) {
68                let _: Token![,] = input.parse()?;
69            }
70        }
71
72        Ok(args)
73    }
74}
75
76// Token-to-features and trait-to-features mappings are generated from
77// token-registry.toml by xtask. Regenerate with: cargo run -p xtask -- generate
78mod generated;
79use generated::{token_to_arch, token_to_features, trait_to_features};
80
81/// Result of extracting token info from a type.
82enum TokenTypeInfo {
83    /// Concrete token type (e.g., `Avx2Token`)
84    Concrete(String),
85    /// impl Trait with the trait names (e.g., `impl HasX64V2`)
86    ImplTrait(Vec<String>),
87    /// Generic type parameter name (e.g., `T`)
88    Generic(String),
89}
90
91/// Extract token type information from a type.
92fn extract_token_type_info(ty: &Type) -> Option<TokenTypeInfo> {
93    match ty {
94        Type::Path(type_path) => {
95            // Get the last segment of the path (e.g., "Avx2Token" from "archmage::Avx2Token")
96            type_path.path.segments.last().map(|seg| {
97                let name = seg.ident.to_string();
98                // Check if it's a known concrete token type
99                if token_to_features(&name).is_some() {
100                    TokenTypeInfo::Concrete(name)
101                } else {
102                    // Might be a generic type parameter like `T`
103                    TokenTypeInfo::Generic(name)
104                }
105            })
106        }
107        Type::Reference(type_ref) => {
108            // Handle &Token or &mut Token
109            extract_token_type_info(&type_ref.elem)
110        }
111        Type::ImplTrait(impl_trait) => {
112            // Handle `impl HasX64V2` or `impl HasX64V2 + HasNeon`
113            let traits: Vec<String> = extract_trait_names_from_bounds(&impl_trait.bounds);
114            if traits.is_empty() {
115                None
116            } else {
117                Some(TokenTypeInfo::ImplTrait(traits))
118            }
119        }
120        _ => None,
121    }
122}
123
124/// Extract trait names from type param bounds.
125fn extract_trait_names_from_bounds(
126    bounds: &syn::punctuated::Punctuated<TypeParamBound, Token![+]>,
127) -> Vec<String> {
128    bounds
129        .iter()
130        .filter_map(|bound| {
131            if let TypeParamBound::Trait(trait_bound) = bound {
132                trait_bound
133                    .path
134                    .segments
135                    .last()
136                    .map(|seg| seg.ident.to_string())
137            } else {
138                None
139            }
140        })
141        .collect()
142}
143
144/// Look up a generic type parameter in the function's generics.
145fn find_generic_bounds(sig: &Signature, type_name: &str) -> Option<Vec<String>> {
146    // Check inline bounds first (e.g., `fn foo<T: HasX64V2>(token: T)`)
147    for param in &sig.generics.params {
148        if let GenericParam::Type(type_param) = param {
149            if type_param.ident == type_name {
150                let traits = extract_trait_names_from_bounds(&type_param.bounds);
151                if !traits.is_empty() {
152                    return Some(traits);
153                }
154            }
155        }
156    }
157
158    // Check where clause (e.g., `fn foo<T>(token: T) where T: HasX64V2`)
159    if let Some(where_clause) = &sig.generics.where_clause {
160        for predicate in &where_clause.predicates {
161            if let syn::WherePredicate::Type(pred_type) = predicate {
162                if let Type::Path(type_path) = &pred_type.bounded_ty {
163                    if let Some(seg) = type_path.path.segments.last() {
164                        if seg.ident == type_name {
165                            let traits = extract_trait_names_from_bounds(&pred_type.bounds);
166                            if !traits.is_empty() {
167                                return Some(traits);
168                            }
169                        }
170                    }
171                }
172            }
173        }
174    }
175
176    None
177}
178
179/// Convert trait names to features, collecting all features from all traits.
180fn traits_to_features(trait_names: &[String]) -> Option<Vec<&'static str>> {
181    let mut all_features = Vec::new();
182
183    for trait_name in trait_names {
184        if let Some(features) = trait_to_features(trait_name) {
185            for &feature in features {
186                if !all_features.contains(&feature) {
187                    all_features.push(feature);
188                }
189            }
190        }
191    }
192
193    if all_features.is_empty() {
194        None
195    } else {
196        Some(all_features)
197    }
198}
199
200/// Find the first token parameter and return its name, features, and target arch.
201///
202/// Returns `(param_ident, features, target_arch)` where:
203/// - `param_ident`: the parameter identifier
204/// - `features`: the target features to enable
205/// - `target_arch`: the target architecture (Some for concrete tokens, None for traits/generics)
206fn find_token_param(sig: &Signature) -> Option<(Ident, Vec<&'static str>, Option<&'static str>)> {
207    for arg in &sig.inputs {
208        match arg {
209            FnArg::Receiver(_) => {
210                // Self receivers (self, &self, &mut self) are not yet supported.
211                // The macro creates an inner function, and Rust's inner functions
212                // cannot have `self` parameters. Supporting this would require
213                // AST rewriting to replace `self` with a regular parameter.
214                // See the module docs for the workaround.
215                continue;
216            }
217            FnArg::Typed(PatType { pat, ty, .. }) => {
218                if let Some(info) = extract_token_type_info(ty) {
219                    let (features, arch) = match info {
220                        TokenTypeInfo::Concrete(ref name) => {
221                            let features = token_to_features(name).map(|f| f.to_vec());
222                            let arch = token_to_arch(name);
223                            (features, arch)
224                        }
225                        TokenTypeInfo::ImplTrait(trait_names) => {
226                            (traits_to_features(&trait_names), None)
227                        }
228                        TokenTypeInfo::Generic(type_name) => {
229                            // Look up the generic parameter's bounds
230                            let features = find_generic_bounds(sig, &type_name)
231                                .and_then(|traits| traits_to_features(&traits));
232                            (features, None)
233                        }
234                    };
235
236                    if let Some(features) = features {
237                        // Extract parameter name
238                        if let syn::Pat::Ident(pat_ident) = pat.as_ref() {
239                            return Some((pat_ident.ident.clone(), features, arch));
240                        }
241                    }
242                }
243            }
244        }
245    }
246    None
247}
248
249/// Represents the kind of self receiver and the transformed parameter.
250enum SelfReceiver {
251    /// `self` (by value/move)
252    Owned,
253    /// `&self` (shared reference)
254    Ref,
255    /// `&mut self` (mutable reference)
256    RefMut,
257}
258
259/// Shared implementation for arcane/arcane macros.
260fn arcane_impl(input_fn: ItemFn, macro_name: &str, args: ArcaneArgs) -> TokenStream {
261    // Check for self receiver
262    let has_self_receiver = input_fn
263        .sig
264        .inputs
265        .first()
266        .map(|arg| matches!(arg, FnArg::Receiver(_)))
267        .unwrap_or(false);
268
269    // If there's a self receiver, we need _self = Type
270    if has_self_receiver && args.self_type.is_none() {
271        let msg = format!(
272            "{} with self receiver requires `_self = Type` argument.\n\
273             Example: #[{}(_self = MyType)]\n\
274             Use `_self` (not `self`) in the function body to refer to self.",
275            macro_name, macro_name
276        );
277        return syn::Error::new_spanned(&input_fn.sig, msg)
278            .to_compile_error()
279            .into();
280    }
281
282    // Find the token parameter, its features, and target arch
283    let (_token_ident, features, target_arch) = match find_token_param(&input_fn.sig) {
284        Some(result) => result,
285        None => {
286            let msg = format!(
287                "{} requires a token parameter. Supported forms:\n\
288                 - Concrete: `token: X64V3Token`\n\
289                 - impl Trait: `token: impl Has256BitSimd`\n\
290                 - Generic: `fn foo<T: HasX64V2>(token: T, ...)`\n\
291                 - With self: `#[{}(_self = Type)] fn method(&self, token: impl HasNeon, ...)`",
292                macro_name, macro_name
293            );
294            return syn::Error::new_spanned(&input_fn.sig, msg)
295                .to_compile_error()
296                .into();
297        }
298    };
299
300    // Build target_feature attributes
301    let target_feature_attrs: Vec<Attribute> = features
302        .iter()
303        .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
304        .collect();
305
306    // Extract function components
307    let vis = &input_fn.vis;
308    let sig = &input_fn.sig;
309    let fn_name = &sig.ident;
310    let generics = &sig.generics;
311    let where_clause = &generics.where_clause;
312    let inputs = &sig.inputs;
313    let output = &sig.output;
314    let body = &input_fn.block;
315    let attrs = &input_fn.attrs;
316
317    // Determine self receiver type if present
318    let self_receiver_kind: Option<SelfReceiver> = inputs.first().and_then(|arg| match arg {
319        FnArg::Receiver(receiver) => {
320            if receiver.reference.is_none() {
321                Some(SelfReceiver::Owned)
322            } else if receiver.mutability.is_some() {
323                Some(SelfReceiver::RefMut)
324            } else {
325                Some(SelfReceiver::Ref)
326            }
327        }
328        _ => None,
329    });
330
331    // Build inner function parameters, transforming self if needed
332    let inner_params: Vec<proc_macro2::TokenStream> = inputs
333        .iter()
334        .map(|arg| match arg {
335            FnArg::Receiver(_) => {
336                // Transform self receiver to _self parameter
337                let self_ty = args.self_type.as_ref().unwrap();
338                match self_receiver_kind.as_ref().unwrap() {
339                    SelfReceiver::Owned => quote!(_self: #self_ty),
340                    SelfReceiver::Ref => quote!(_self: &#self_ty),
341                    SelfReceiver::RefMut => quote!(_self: &mut #self_ty),
342                }
343            }
344            FnArg::Typed(pat_type) => quote!(#pat_type),
345        })
346        .collect();
347
348    // Build inner function call arguments
349    let inner_args: Vec<proc_macro2::TokenStream> = inputs
350        .iter()
351        .filter_map(|arg| match arg {
352            FnArg::Typed(pat_type) => {
353                if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
354                    let ident = &pat_ident.ident;
355                    Some(quote!(#ident))
356                } else {
357                    None
358                }
359            }
360            FnArg::Receiver(_) => Some(quote!(self)), // Pass self to inner as _self
361        })
362        .collect();
363
364    let inner_fn_name = format_ident!("__simd_inner_{}", fn_name);
365
366    // Choose inline attribute based on args
367    // Note: #[inline(always)] + #[target_feature] requires nightly with
368    // #![feature(target_feature_inline_always)]
369    let inline_attr: Attribute = if args.inline_always {
370        parse_quote!(#[inline(always)])
371    } else {
372        parse_quote!(#[inline])
373    };
374
375    // Transform output and body to replace Self with concrete type if needed
376    let (inner_output, inner_body): (ReturnType, syn::Block) =
377        if let Some(ref self_ty) = args.self_type {
378            let mut replacer = ReplaceSelf {
379                replacement: self_ty,
380            };
381            let transformed_output = replacer.fold_return_type(output.clone());
382            let transformed_body = replacer.fold_block((**body).clone());
383            (transformed_output, transformed_body)
384        } else {
385            (output.clone(), (**body).clone())
386        };
387
388    // Generate the expanded function
389    // If we know the target arch (concrete token), generate cfg-gated real impl + stub
390    let expanded = if let Some(arch) = target_arch {
391        quote! {
392            // Real implementation for the correct architecture
393            #[cfg(target_arch = #arch)]
394            #(#attrs)*
395            #vis #sig {
396                #(#target_feature_attrs)*
397                #inline_attr
398                fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #where_clause
399                #inner_body
400
401                // SAFETY: The token parameter proves the required CPU features are available.
402                // Calling a #[target_feature] function from a non-matching context requires
403                // unsafe because the CPU may not support those instructions. The token's
404                // existence proves summon() succeeded, so the features are available.
405                unsafe { #inner_fn_name(#(#inner_args),*) }
406            }
407
408            // Stub for other architectures - the token cannot be obtained, so this is unreachable
409            #[cfg(not(target_arch = #arch))]
410            #(#attrs)*
411            #vis #sig {
412                // This token type cannot be summoned on this architecture.
413                // If you're seeing this at runtime, there's a bug in your dispatch logic.
414                let _ = (#(#inner_args),*); // suppress unused warnings
415                unreachable!(
416                    concat!(
417                        "Called ",
418                        stringify!(#fn_name),
419                        " with a token that cannot exist on this architecture. ",
420                        "This token requires target_arch = \"",
421                        #arch,
422                        "\"."
423                    )
424                )
425            }
426        }
427    } else {
428        // No specific arch (trait bounds or generic) - generate without cfg guards
429        quote! {
430            #(#attrs)*
431            #vis #sig {
432                #(#target_feature_attrs)*
433                #inline_attr
434                fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #where_clause
435                #inner_body
436
437                // SAFETY: Calling a #[target_feature] function from a non-matching context
438                // requires unsafe. The token proves the required CPU features are available.
439                unsafe { #inner_fn_name(#(#inner_args),*) }
440            }
441        }
442    };
443
444    expanded.into()
445}
446
447/// Mark a function as an arcane SIMD function.
448///
449/// This macro enables safe use of SIMD intrinsics by generating an inner function
450/// with the appropriate `#[target_feature(enable = "...")]` attributes based on
451/// the token parameter type. The outer function calls the inner function unsafely,
452/// which is justified because the token parameter proves the features are available.
453///
454/// **The token is passed through to the inner function**, so you can call other
455/// token-taking functions from inside `#[arcane]`.
456///
457/// # Token Parameter Forms
458///
459/// The macro supports four forms of token parameters:
460///
461/// ## Concrete Token Types
462///
463/// ```ignore
464/// #[arcane]
465/// fn process(token: Avx2Token, data: &[f32; 8]) -> [f32; 8] {
466///     // AVX2 intrinsics safe here
467/// }
468/// ```
469///
470/// ## impl Trait Bounds
471///
472/// ```ignore
473/// #[arcane]
474/// fn process(token: impl HasX64V2, data: &[f32; 8]) -> [f32; 8] {
475///     // Accepts any token with x86-64-v2 features (SSE4.2+)
476/// }
477/// ```
478///
479/// ## Generic Type Parameters
480///
481/// ```ignore
482/// #[arcane]
483/// fn process<T: HasX64V2>(token: T, data: &[f32; 8]) -> [f32; 8] {
484///     // Generic over any v2-capable token
485/// }
486///
487/// // Also works with where clauses:
488/// #[arcane]
489/// fn process<T>(token: T, data: &[f32; 8]) -> [f32; 8]
490/// where
491///     T: HasX64V2
492/// {
493///     // ...
494/// }
495/// ```
496///
497/// ## Methods with Self Receivers
498///
499/// Methods with `self`, `&self`, `&mut self` receivers are supported via the
500/// `_self = Type` argument. Use `_self` in the function body instead of `self`:
501///
502/// ```ignore
503/// use archmage::{X64V3Token, arcane};
504/// use wide::f32x8;
505///
506/// trait SimdOps {
507///     fn double(&self, token: X64V3Token) -> Self;
508///     fn square(self, token: X64V3Token) -> Self;
509///     fn scale(&mut self, token: X64V3Token, factor: f32);
510/// }
511///
512/// impl SimdOps for f32x8 {
513///     #[arcane(_self = f32x8)]
514///     fn double(&self, _token: X64V3Token) -> Self {
515///         // Use _self instead of self in the body
516///         *_self + *_self
517///     }
518///
519///     #[arcane(_self = f32x8)]
520///     fn square(self, _token: X64V3Token) -> Self {
521///         _self * _self
522///     }
523///
524///     #[arcane(_self = f32x8)]
525///     fn scale(&mut self, _token: X64V3Token, factor: f32) {
526///         *_self = *_self * f32x8::splat(factor);
527///     }
528/// }
529/// ```
530///
531/// **Why `_self`?** The macro generates an inner function where `self` becomes
532/// a regular parameter named `_self`. Using `_self` in your code reminds you
533/// that you're not using the normal `self` keyword.
534///
535/// **All receiver types are supported:**
536/// - `self` (by value/move) → `_self: Type`
537/// - `&self` (shared reference) → `_self: &Type`
538/// - `&mut self` (mutable reference) → `_self: &mut Type`
539///
540/// # Multiple Trait Bounds
541///
542/// When using `impl Trait` or generic bounds with multiple traits,
543/// all required features are enabled:
544///
545/// ```ignore
546/// #[arcane]
547/// fn fma_kernel(token: impl HasX64V2 + HasNeon, data: &[f32; 8]) -> [f32; 8] {
548///     // Cross-platform: SSE4.2 on x86, NEON on ARM
549/// }
550/// ```
551///
552/// # Expansion
553///
554/// The macro expands to approximately:
555///
556/// ```ignore
557/// fn process(token: Avx2Token, data: &[f32; 8]) -> [f32; 8] {
558///     #[target_feature(enable = "avx2")]
559///     #[inline]
560///     fn __simd_inner_process(token: Avx2Token, data: &[f32; 8]) -> [f32; 8] {
561///         let v = unsafe { _mm256_loadu_ps(data.as_ptr()) };
562///         let doubled = _mm256_add_ps(v, v);
563///         let mut out = [0.0f32; 8];
564///         unsafe { _mm256_storeu_ps(out.as_mut_ptr(), doubled) };
565///         out
566///     }
567///     // SAFETY: Calling #[target_feature] fn from non-matching context.
568///     // Token proves the required features are available.
569///     unsafe { __simd_inner_process(token, data) }
570/// }
571/// ```
572///
573/// # Profile Tokens
574///
575/// Profile tokens automatically enable all required features:
576///
577/// ```ignore
578/// #[arcane]
579/// fn kernel(token: X64V3Token, data: &mut [f32]) {
580///     // AVX2 + FMA + BMI1 + BMI2 intrinsics all safe here!
581/// }
582/// ```
583///
584/// # Supported Tokens
585///
586/// - **x86_64 tiers**: `X64V2Token`, `X64V3Token` / `Desktop64` / `Avx2FmaToken`,
587///   `X64V4Token` / `Avx512Token` / `Server64`, `Avx512ModernToken`, `Avx512Fp16Token`
588/// - **ARM**: `NeonToken` / `Arm64`, `NeonAesToken`, `NeonSha3Token`, `NeonCrcToken`
589/// - **WASM**: `Wasm128Token`
590///
591/// # Supported Trait Bounds
592///
593/// - **x86_64 tiers**: `HasX64V2`, `HasX64V4`
594/// - **ARM**: `HasNeon`, `HasNeonAes`, `HasNeonSha3`
595///
596/// **Preferred:** Use concrete tokens (`X64V3Token`, `Desktop64`, `NeonToken`) directly.
597/// Concrete token types also work as trait bounds (e.g., `impl X64V3Token`).
598///
599/// # Options
600///
601/// ## `inline_always`
602///
603/// Use `#[inline(always)]` instead of `#[inline]` for the inner function.
604/// This can improve performance by ensuring aggressive inlining, but requires
605/// nightly Rust with `#![feature(target_feature_inline_always)]` enabled in
606/// the crate using the macro.
607///
608/// ```ignore
609/// #![feature(target_feature_inline_always)]
610///
611/// #[arcane(inline_always)]
612/// fn fast_kernel(token: Avx2Token, data: &mut [f32]) {
613///     // Inner function will use #[inline(always)]
614/// }
615/// ```
616#[proc_macro_attribute]
617pub fn arcane(attr: TokenStream, item: TokenStream) -> TokenStream {
618    let args = parse_macro_input!(attr as ArcaneArgs);
619    let input_fn = parse_macro_input!(item as ItemFn);
620    arcane_impl(input_fn, "arcane", args)
621}
622
623/// Legacy alias for [`arcane`].
624///
625/// **Deprecated:** Use `#[arcane]` instead. This alias exists only for migration.
626#[proc_macro_attribute]
627#[doc(hidden)]
628pub fn simd_fn(attr: TokenStream, item: TokenStream) -> TokenStream {
629    let args = parse_macro_input!(attr as ArcaneArgs);
630    let input_fn = parse_macro_input!(item as ItemFn);
631    arcane_impl(input_fn, "simd_fn", args)
632}
633
634// ============================================================================
635// Rite macro for inner SIMD functions (no wrapper overhead)
636// ============================================================================
637
638/// Annotate inner SIMD helpers called from `#[arcane]` functions.
639///
640/// Unlike `#[arcane]`, which creates a wrapper function, `#[rite]` simply adds
641/// `#[target_feature]` and `#[inline]` attributes. This allows the function to
642/// inline directly into calling `#[arcane]` functions without optimization barriers.
643///
644/// # When to Use
645///
646/// Use `#[rite]` for helper functions that are **only** called from within
647/// `#[arcane]` functions with matching or superset token types:
648///
649/// ```ignore
650/// use archmage::{arcane, rite, X64V3Token};
651///
652/// #[arcane]
653/// fn outer(token: X64V3Token, data: &[f32; 8]) -> f32 {
654///     // helper inlines directly - no wrapper overhead
655///     helper(token, data) * 2.0
656/// }
657///
658/// #[rite]
659/// fn helper(token: X64V3Token, data: &[f32; 8]) -> f32 {
660///     // Just has #[target_feature(enable = "avx2,fma,...")]
661///     // Called from #[arcane] context, so features are guaranteed
662///     let v = f32x8::from_array(token, *data);
663///     v.reduce_add()
664/// }
665/// ```
666///
667/// # Safety
668///
669/// `#[rite]` functions can only be safely called from contexts where the
670/// required CPU features are enabled:
671/// - From within `#[arcane]` functions with matching/superset tokens
672/// - From within other `#[rite]` functions with matching/superset tokens
673/// - From code compiled with `-Ctarget-cpu` that enables the features
674///
675/// Calling from other contexts requires `unsafe` and the caller must ensure
676/// the CPU supports the required features.
677///
678/// # Comparison with #[arcane]
679///
680/// | Aspect | `#[arcane]` | `#[rite]` |
681/// |--------|-------------|-----------|
682/// | Creates wrapper | Yes | No |
683/// | Entry point | Yes | No |
684/// | Inlines into caller | No (barrier) | Yes |
685/// | Safe to call anywhere | Yes (with token) | Only from feature-enabled context |
686#[proc_macro_attribute]
687pub fn rite(attr: TokenStream, item: TokenStream) -> TokenStream {
688    // Parse optional arguments (currently just inline_always)
689    let args = parse_macro_input!(attr as RiteArgs);
690    let input_fn = parse_macro_input!(item as ItemFn);
691    rite_impl(input_fn, args)
692}
693
694/// Arguments for the `#[rite]` macro.
695///
696/// Currently empty - `#[inline(always)]` is not supported because
697/// `#[inline(always)]` + `#[target_feature]` requires nightly Rust.
698/// The regular `#[inline]` hint is sufficient when called from
699/// matching `#[target_feature]` contexts.
700#[derive(Default)]
701struct RiteArgs {
702    // No options currently - inline_always doesn't work on stable
703}
704
705impl Parse for RiteArgs {
706    fn parse(input: ParseStream) -> syn::Result<Self> {
707        if !input.is_empty() {
708            let ident: Ident = input.parse()?;
709            return Err(syn::Error::new(
710                ident.span(),
711                "#[rite] takes no arguments. Note: inline_always is not supported \
712                 because #[inline(always)] + #[target_feature] requires nightly Rust.",
713            ));
714        }
715        Ok(RiteArgs::default())
716    }
717}
718
719/// Implementation for the `#[rite]` macro.
720fn rite_impl(mut input_fn: ItemFn, args: RiteArgs) -> TokenStream {
721    // Find the token parameter and its features
722    let (_, features, target_arch) = match find_token_param(&input_fn.sig) {
723        Some(result) => result,
724        None => {
725            let msg = "rite requires a token parameter. Supported forms:\n\
726                 - Concrete: `token: X64V3Token`\n\
727                 - impl Trait: `token: impl HasX64V2`\n\
728                 - Generic: `fn foo<T: HasX64V2>(token: T, ...)`";
729            return syn::Error::new_spanned(&input_fn.sig, msg)
730                .to_compile_error()
731                .into();
732        }
733    };
734
735    // Build target_feature attributes
736    let target_feature_attrs: Vec<Attribute> = features
737        .iter()
738        .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
739        .collect();
740
741    // Always use #[inline] - #[inline(always)] + #[target_feature] requires nightly
742    let _ = args; // RiteArgs is currently empty but kept for future extensibility
743    let inline_attr: Attribute = parse_quote!(#[inline]);
744
745    // Prepend attributes to the function
746    let mut new_attrs = target_feature_attrs;
747    new_attrs.push(inline_attr);
748    new_attrs.append(&mut input_fn.attrs);
749    input_fn.attrs = new_attrs;
750
751    // If we know the target arch, generate cfg-gated impl + stub
752    if let Some(arch) = target_arch {
753        let vis = &input_fn.vis;
754        let sig = &input_fn.sig;
755        let attrs = &input_fn.attrs;
756        let block = &input_fn.block;
757
758        quote! {
759            #[cfg(target_arch = #arch)]
760            #(#attrs)*
761            #vis #sig
762            #block
763
764            #[cfg(not(target_arch = #arch))]
765            #vis #sig {
766                unreachable!(concat!(
767                    "This function requires ",
768                    #arch,
769                    " architecture"
770                ))
771            }
772        }
773        .into()
774    } else {
775        // No specific arch (trait bounds) - just emit the annotated function
776        quote!(#input_fn).into()
777    }
778}
779
780// =============================================================================
781// magetypes! macro - generate platform variants from generic function
782// =============================================================================
783
784/// Configuration for a magetypes variant
785struct MagetypesVariant {
786    suffix: &'static str,
787    token_type: &'static str,
788    target_arch: Option<&'static str>,
789    cargo_feature: Option<&'static str>,
790}
791
792const MAGETYPES_VARIANTS: &[MagetypesVariant] = &[
793    // x86_64 V3 (AVX2)
794    MagetypesVariant {
795        suffix: "v3",
796        token_type: "archmage::X64V3Token",
797        target_arch: Some("x86_64"),
798        cargo_feature: None,
799    },
800    // x86_64 V4 (AVX-512)
801    MagetypesVariant {
802        suffix: "v4",
803        token_type: "archmage::X64V4Token",
804        target_arch: Some("x86_64"),
805        cargo_feature: Some("avx512"),
806    },
807    // aarch64 NEON
808    MagetypesVariant {
809        suffix: "neon",
810        token_type: "archmage::NeonToken",
811        target_arch: Some("aarch64"),
812        cargo_feature: None,
813    },
814    // wasm32 SIMD128
815    MagetypesVariant {
816        suffix: "wasm128",
817        token_type: "archmage::Wasm128Token",
818        target_arch: Some("wasm32"),
819        cargo_feature: None,
820    },
821    // Scalar fallback
822    MagetypesVariant {
823        suffix: "scalar",
824        token_type: "archmage::ScalarToken",
825        target_arch: None, // Always available
826        cargo_feature: None,
827    },
828];
829
830/// Generate platform-specific variants from a function by replacing `Token`.
831///
832/// Use `Token` as a placeholder for the token type. The macro generates
833/// suffixed variants (`_v3`, `_v4`, `_neon`, `_wasm128`, `_scalar`) with
834/// `Token` replaced by the concrete token type, and each variant wrapped
835/// in the appropriate `#[cfg(target_arch = ...)]` guard.
836///
837/// # What gets replaced
838///
839/// **Only `Token`** is replaced — with the concrete token type for each variant
840/// (e.g., `archmage::X64V3Token`, `archmage::ScalarToken`). SIMD types like
841/// `f32x8` and constants like `LANES` are **not** replaced by this macro.
842///
843/// This means `#[magetypes]` works well for functions that only need the token
844/// (e.g., to pass to other functions), but not for functions that use
845/// platform-specific SIMD types directly. For those, write `_v3` and `_scalar`
846/// variants manually and use `incant!` for dispatch.
847///
848/// # Example
849///
850/// ```rust,ignore
851/// use archmage::magetypes;
852///
853/// // Works: function only uses Token, no SIMD types
854/// #[magetypes]
855/// fn process(token: Token, data: &[f32]) -> f32 {
856///     // delegates to other functions that handle SIMD internally
857///     inner_simd_work(token, data)
858/// }
859///
860/// // Generates:
861/// // - process_v3(token: X64V3Token, ...) — #[cfg(target_arch = "x86_64")]
862/// // - process_v4(token: X64V4Token, ...) — #[cfg(target_arch = "x86_64", feature = "avx512")]
863/// // - process_neon(token: NeonToken, ...) — #[cfg(target_arch = "aarch64")]
864/// // - process_wasm128(token: Wasm128Token, ...) — #[cfg(target_arch = "wasm32")]
865/// // - process_scalar(token: ScalarToken, ...) — always available
866/// ```
867///
868/// # Usage with incant!
869///
870/// The generated variants work with `incant!` for dispatch:
871///
872/// ```rust,ignore
873/// pub fn process_api(data: &[f32]) -> f32 {
874///     incant!(process(data))
875/// }
876/// ```
877#[proc_macro_attribute]
878pub fn magetypes(attr: TokenStream, item: TokenStream) -> TokenStream {
879    // Ignore attributes for now (could add variant selection later)
880    let _ = attr;
881    let input_fn = parse_macro_input!(item as ItemFn);
882    magetypes_impl(input_fn)
883}
884
885fn magetypes_impl(input_fn: ItemFn) -> TokenStream {
886    let fn_name = &input_fn.sig.ident;
887    let fn_attrs = &input_fn.attrs;
888
889    // Convert function to string for text substitution
890    let fn_str = input_fn.to_token_stream().to_string();
891
892    let mut variants = Vec::new();
893
894    for variant in MAGETYPES_VARIANTS {
895        // Create suffixed function name
896        let suffixed_name = format!("{}_{}", fn_name, variant.suffix);
897
898        // Do text substitution
899        let mut variant_str = fn_str.clone();
900
901        // Replace function name
902        variant_str = variant_str.replacen(&fn_name.to_string(), &suffixed_name, 1);
903
904        // Replace Token type with concrete token
905        variant_str = variant_str.replace("Token", variant.token_type);
906
907        // Parse back to tokens
908        let variant_tokens: proc_macro2::TokenStream = match variant_str.parse() {
909            Ok(t) => t,
910            Err(e) => {
911                return syn::Error::new_spanned(
912                    &input_fn,
913                    format!(
914                        "Failed to parse generated variant `{}`: {}",
915                        suffixed_name, e
916                    ),
917                )
918                .to_compile_error()
919                .into();
920            }
921        };
922
923        // Add cfg guards
924        let cfg_guard = match (variant.target_arch, variant.cargo_feature) {
925            (Some(arch), Some(feature)) => {
926                quote! { #[cfg(all(target_arch = #arch, feature = #feature))] }
927            }
928            (Some(arch), None) => {
929                quote! { #[cfg(target_arch = #arch)] }
930            }
931            (None, Some(feature)) => {
932                quote! { #[cfg(feature = #feature)] }
933            }
934            (None, None) => {
935                quote! {} // No guard needed (scalar)
936            }
937        };
938
939        variants.push(quote! {
940            #cfg_guard
941            #variant_tokens
942        });
943    }
944
945    // Remove attributes from the list that should not be duplicated
946    let filtered_attrs: Vec<_> = fn_attrs
947        .iter()
948        .filter(|a| !a.path().is_ident("magetypes"))
949        .collect();
950
951    let output = quote! {
952        #(#filtered_attrs)*
953        #(#variants)*
954    };
955
956    output.into()
957}
958
959// =============================================================================
960// incant! macro - dispatch to platform-specific variants
961// =============================================================================
962
963/// Input for the incant! macro
964struct IncantInput {
965    /// Function name to call
966    func_name: Ident,
967    /// Arguments to pass
968    args: Vec<syn::Expr>,
969    /// Optional token variable for passthrough mode
970    with_token: Option<syn::Expr>,
971}
972
973impl Parse for IncantInput {
974    fn parse(input: ParseStream) -> syn::Result<Self> {
975        // Parse: function_name(arg1, arg2, ...) [with token_expr]
976        let func_name: Ident = input.parse()?;
977
978        // Parse parenthesized arguments
979        let content;
980        syn::parenthesized!(content in input);
981        let args = content
982            .parse_terminated(syn::Expr::parse, Token![,])?
983            .into_iter()
984            .collect();
985
986        // Check for optional "with token"
987        let with_token = if input.peek(Ident) {
988            let kw: Ident = input.parse()?;
989            if kw != "with" {
990                return Err(syn::Error::new_spanned(kw, "expected `with` keyword"));
991            }
992            Some(input.parse()?)
993        } else {
994            None
995        };
996
997        Ok(IncantInput {
998            func_name,
999            args,
1000            with_token,
1001        })
1002    }
1003}
1004
1005/// Dispatch to platform-specific SIMD variants.
1006///
1007/// # Entry Point Mode (no token yet)
1008///
1009/// Summons tokens and dispatches to the best available variant:
1010///
1011/// ```rust,ignore
1012/// pub fn public_api(data: &[f32]) -> f32 {
1013///     incant!(dot(data))
1014/// }
1015/// ```
1016///
1017/// Expands to runtime feature detection + dispatch to `dot_v3`, `dot_v4`,
1018/// `dot_neon`, `dot_wasm128`, or `dot_scalar`.
1019///
1020/// # Passthrough Mode (already have token)
1021///
1022/// Uses compile-time dispatch via `IntoConcreteToken`:
1023///
1024/// ```rust,ignore
1025/// #[arcane]
1026/// fn outer(token: X64V3Token, data: &[f32]) -> f32 {
1027///     incant!(inner(data) with token)
1028/// }
1029/// ```
1030///
1031/// The compiler monomorphizes the dispatch, eliminating non-matching branches.
1032///
1033/// # Variant Naming
1034///
1035/// Functions must have suffixed variants:
1036/// - `_v3` for `X64V3Token`
1037/// - `_v4` for `X64V4Token` (requires `avx512` feature)
1038/// - `_neon` for `NeonToken`
1039/// - `_wasm128` for `Wasm128Token`
1040/// - `_scalar` for `ScalarToken`
1041#[proc_macro]
1042pub fn incant(input: TokenStream) -> TokenStream {
1043    let input = parse_macro_input!(input as IncantInput);
1044    incant_impl(input)
1045}
1046
1047/// Legacy alias for [`incant!`].
1048#[proc_macro]
1049pub fn simd_route(input: TokenStream) -> TokenStream {
1050    let input = parse_macro_input!(input as IncantInput);
1051    incant_impl(input)
1052}
1053
1054fn incant_impl(input: IncantInput) -> TokenStream {
1055    let func_name = &input.func_name;
1056    let args = &input.args;
1057
1058    // Create suffixed function names
1059    let fn_v3 = format_ident!("{}_v3", func_name);
1060    let fn_v4 = format_ident!("{}_v4", func_name);
1061    let fn_neon = format_ident!("{}_neon", func_name);
1062    let fn_wasm128 = format_ident!("{}_wasm128", func_name);
1063    let fn_scalar = format_ident!("{}_scalar", func_name);
1064
1065    // Use labeled blocks instead of `return` so incant! can be chained.
1066    // Labeled blocks are stable since Rust 1.65.
1067    if let Some(token_expr) = &input.with_token {
1068        // Passthrough mode: use IntoConcreteToken for compile-time dispatch
1069        let expanded = quote! {
1070            '__incant: {
1071                use archmage::IntoConcreteToken;
1072                let __incant_token = #token_expr;
1073
1074                #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
1075                {
1076                    #[cfg(feature = "avx512")]
1077                    if let Some(__t) = __incant_token.as_x64v4() {
1078                        break '__incant #fn_v4(__t, #(#args),*);
1079                    }
1080                    if let Some(__t) = __incant_token.as_x64v3() {
1081                        break '__incant #fn_v3(__t, #(#args),*);
1082                    }
1083                }
1084
1085                #[cfg(target_arch = "aarch64")]
1086                if let Some(__t) = __incant_token.as_neon() {
1087                    break '__incant #fn_neon(__t, #(#args),*);
1088                }
1089
1090                #[cfg(target_arch = "wasm32")]
1091                if let Some(__t) = __incant_token.as_wasm128() {
1092                    break '__incant #fn_wasm128(__t, #(#args),*);
1093                }
1094
1095                if let Some(__t) = __incant_token.as_scalar() {
1096                    break '__incant #fn_scalar(__t, #(#args),*);
1097                }
1098
1099                unreachable!("Token did not match any known variant")
1100            }
1101        };
1102        expanded.into()
1103    } else {
1104        // Entry point mode: summon tokens and dispatch
1105        let expanded = quote! {
1106            '__incant: {
1107                use archmage::SimdToken;
1108
1109                #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
1110                {
1111                    #[cfg(feature = "avx512")]
1112                    if let Some(__t) = archmage::X64V4Token::summon() {
1113                        break '__incant #fn_v4(__t, #(#args),*);
1114                    }
1115                    if let Some(__t) = archmage::X64V3Token::summon() {
1116                        break '__incant #fn_v3(__t, #(#args),*);
1117                    }
1118                }
1119
1120                #[cfg(target_arch = "aarch64")]
1121                if let Some(__t) = archmage::NeonToken::summon() {
1122                    break '__incant #fn_neon(__t, #(#args),*);
1123                }
1124
1125                #[cfg(target_arch = "wasm32")]
1126                if let Some(__t) = archmage::Wasm128Token::summon() {
1127                    break '__incant #fn_wasm128(__t, #(#args),*);
1128                }
1129
1130                // Scalar fallback
1131                #fn_scalar(archmage::ScalarToken, #(#args),*)
1132            }
1133        };
1134        expanded.into()
1135    }
1136}
1137
1138// =============================================================================
1139// Unit tests for token/trait recognition maps
1140// =============================================================================
1141
1142#[cfg(test)]
1143mod tests {
1144    use super::*;
1145
1146    use super::generated::{ALL_CONCRETE_TOKENS, ALL_TRAIT_NAMES};
1147
1148    #[test]
1149    fn every_concrete_token_is_in_token_to_features() {
1150        for &name in ALL_CONCRETE_TOKENS {
1151            assert!(
1152                token_to_features(name).is_some(),
1153                "Token `{}` exists in runtime crate but is NOT recognized by \
1154                 token_to_features() in the proc macro. Add it!",
1155                name
1156            );
1157        }
1158    }
1159
1160    #[test]
1161    fn every_trait_is_in_trait_to_features() {
1162        for &name in ALL_TRAIT_NAMES {
1163            assert!(
1164                trait_to_features(name).is_some(),
1165                "Trait `{}` exists in runtime crate but is NOT recognized by \
1166                 trait_to_features() in the proc macro. Add it!",
1167                name
1168            );
1169        }
1170    }
1171
1172    #[test]
1173    fn token_aliases_map_to_same_features() {
1174        // Desktop64 = X64V3Token
1175        assert_eq!(
1176            token_to_features("Desktop64"),
1177            token_to_features("X64V3Token"),
1178            "Desktop64 and X64V3Token should map to identical features"
1179        );
1180
1181        // Server64 = X64V4Token = Avx512Token
1182        assert_eq!(
1183            token_to_features("Server64"),
1184            token_to_features("X64V4Token"),
1185            "Server64 and X64V4Token should map to identical features"
1186        );
1187        assert_eq!(
1188            token_to_features("X64V4Token"),
1189            token_to_features("Avx512Token"),
1190            "X64V4Token and Avx512Token should map to identical features"
1191        );
1192
1193        // Arm64 = NeonToken
1194        assert_eq!(
1195            token_to_features("Arm64"),
1196            token_to_features("NeonToken"),
1197            "Arm64 and NeonToken should map to identical features"
1198        );
1199    }
1200
1201    #[test]
1202    fn trait_to_features_includes_tokens_as_bounds() {
1203        // Tier tokens should also work as trait bounds
1204        // (for `impl X64V3Token` patterns, even though Rust won't allow it,
1205        // the macro processes AST before type checking)
1206        let tier_tokens = [
1207            "X64V2Token",
1208            "X64V3Token",
1209            "Desktop64",
1210            "Avx2FmaToken",
1211            "X64V4Token",
1212            "Avx512Token",
1213            "Server64",
1214            "Avx512ModernToken",
1215            "Avx512Fp16Token",
1216            "NeonToken",
1217            "Arm64",
1218            "NeonAesToken",
1219            "NeonSha3Token",
1220            "NeonCrcToken",
1221        ];
1222
1223        for &name in &tier_tokens {
1224            assert!(
1225                trait_to_features(name).is_some(),
1226                "Tier token `{}` should also be recognized in trait_to_features() \
1227                 for use as a generic bound. Add it!",
1228                name
1229            );
1230        }
1231    }
1232
1233    #[test]
1234    fn trait_features_are_cumulative() {
1235        // HasX64V4 should include all HasX64V2 features plus more
1236        let v2_features = trait_to_features("HasX64V2").unwrap();
1237        let v4_features = trait_to_features("HasX64V4").unwrap();
1238
1239        for &f in v2_features {
1240            assert!(
1241                v4_features.contains(&f),
1242                "HasX64V4 should include v2 feature `{}` but doesn't",
1243                f
1244            );
1245        }
1246
1247        // v4 should have more features than v2
1248        assert!(
1249            v4_features.len() > v2_features.len(),
1250            "HasX64V4 should have more features than HasX64V2"
1251        );
1252    }
1253
1254    #[test]
1255    fn x64v3_trait_features_include_v2() {
1256        // X64V3Token as trait bound should include v2 features
1257        let v2 = trait_to_features("HasX64V2").unwrap();
1258        let v3 = trait_to_features("X64V3Token").unwrap();
1259
1260        for &f in v2 {
1261            assert!(
1262                v3.contains(&f),
1263                "X64V3Token trait features should include v2 feature `{}` but don't",
1264                f
1265            );
1266        }
1267    }
1268
1269    #[test]
1270    fn has_neon_aes_includes_neon() {
1271        let neon = trait_to_features("HasNeon").unwrap();
1272        let neon_aes = trait_to_features("HasNeonAes").unwrap();
1273
1274        for &f in neon {
1275            assert!(
1276                neon_aes.contains(&f),
1277                "HasNeonAes should include NEON feature `{}`",
1278                f
1279            );
1280        }
1281    }
1282
1283    #[test]
1284    fn no_removed_traits_are_recognized() {
1285        // These traits were removed in 0.3.0 and should NOT be recognized
1286        let removed = [
1287            "HasSse",
1288            "HasSse2",
1289            "HasSse41",
1290            "HasSse42",
1291            "HasAvx",
1292            "HasAvx2",
1293            "HasFma",
1294            "HasAvx512f",
1295            "HasAvx512bw",
1296            "HasAvx512vl",
1297            "HasAvx512vbmi2",
1298            "HasSve",
1299            "HasSve2",
1300        ];
1301
1302        for &name in &removed {
1303            assert!(
1304                trait_to_features(name).is_none(),
1305                "Removed trait `{}` should NOT be in trait_to_features(). \
1306                 It was removed in 0.3.0 — users should migrate to tier traits.",
1307                name
1308            );
1309        }
1310    }
1311
1312    #[test]
1313    fn no_nonexistent_tokens_are_recognized() {
1314        // These tokens don't exist and should NOT be recognized
1315        let fake = [
1316            "Sse2Token",
1317            "SveToken",
1318            "Sve2Token",
1319            "Avx512VnniToken",
1320            "X64V4ModernToken",
1321            "NeonFp16Token",
1322        ];
1323
1324        for &name in &fake {
1325            assert!(
1326                token_to_features(name).is_none(),
1327                "Non-existent token `{}` should NOT be in token_to_features()",
1328                name
1329            );
1330        }
1331    }
1332}
archmage_macros/lib.rs

archmage_macros/
lib.rs