archmage_macros/lib.rs
1//! Proc-macros for archmage SIMD capability tokens.
2//!
3//! Provides `#[arcane]` attribute (with `#[arcane]` alias) to make raw intrinsics
4//! safe via token proof.
5
6use proc_macro::TokenStream;
7use quote::{format_ident, quote, ToTokens};
8use syn::{
9 fold::Fold,
10 parse::{Parse, ParseStream},
11 parse_macro_input, parse_quote, Attribute, FnArg, GenericParam, Ident, ItemFn, PatType,
12 ReturnType, Signature, Token, Type, TypeParamBound,
13};
14
15/// A Fold implementation that replaces `Self` with a concrete type.
16struct ReplaceSelf<'a> {
17 replacement: &'a Type,
18}
19
20impl Fold for ReplaceSelf<'_> {
21 fn fold_type(&mut self, ty: Type) -> Type {
22 match ty {
23 Type::Path(ref type_path) if type_path.qself.is_none() => {
24 // Check if it's just `Self`
25 if type_path.path.is_ident("Self") {
26 return self.replacement.clone();
27 }
28 // Otherwise continue folding
29 syn::fold::fold_type(self, ty)
30 }
31 _ => syn::fold::fold_type(self, ty),
32 }
33 }
34}
35
36/// Arguments to the `#[arcane]` macro.
37#[derive(Default)]
38struct ArcaneArgs {
39 /// Use `#[inline(always)]` instead of `#[inline]` for the inner function.
40 /// Requires nightly Rust with `#![feature(target_feature_inline_always)]`.
41 inline_always: bool,
42 /// The concrete type to use for `self` receiver.
43 /// When specified, `self`/`&self`/`&mut self` is transformed to `_self: Type`/`&Type`/`&mut Type`.
44 self_type: Option<Type>,
45}
46
47impl Parse for ArcaneArgs {
48 fn parse(input: ParseStream) -> syn::Result<Self> {
49 let mut args = ArcaneArgs::default();
50
51 while !input.is_empty() {
52 let ident: Ident = input.parse()?;
53 match ident.to_string().as_str() {
54 "inline_always" => args.inline_always = true,
55 "_self" => {
56 let _: Token![=] = input.parse()?;
57 args.self_type = Some(input.parse()?);
58 }
59 other => {
60 return Err(syn::Error::new(
61 ident.span(),
62 format!("unknown arcane argument: `{}`", other),
63 ))
64 }
65 }
66 // Consume optional comma
67 if input.peek(Token![,]) {
68 let _: Token![,] = input.parse()?;
69 }
70 }
71
72 Ok(args)
73 }
74}
75
76// Token-to-features and trait-to-features mappings are generated from
77// token-registry.toml by xtask. Regenerate with: cargo run -p xtask -- generate
78mod generated;
79use generated::{token_to_arch, token_to_features, trait_to_features};
80
81/// Result of extracting token info from a type.
82enum TokenTypeInfo {
83 /// Concrete token type (e.g., `Avx2Token`)
84 Concrete(String),
85 /// impl Trait with the trait names (e.g., `impl HasX64V2`)
86 ImplTrait(Vec<String>),
87 /// Generic type parameter name (e.g., `T`)
88 Generic(String),
89}
90
91/// Extract token type information from a type.
92fn extract_token_type_info(ty: &Type) -> Option<TokenTypeInfo> {
93 match ty {
94 Type::Path(type_path) => {
95 // Get the last segment of the path (e.g., "Avx2Token" from "archmage::Avx2Token")
96 type_path.path.segments.last().map(|seg| {
97 let name = seg.ident.to_string();
98 // Check if it's a known concrete token type
99 if token_to_features(&name).is_some() {
100 TokenTypeInfo::Concrete(name)
101 } else {
102 // Might be a generic type parameter like `T`
103 TokenTypeInfo::Generic(name)
104 }
105 })
106 }
107 Type::Reference(type_ref) => {
108 // Handle &Token or &mut Token
109 extract_token_type_info(&type_ref.elem)
110 }
111 Type::ImplTrait(impl_trait) => {
112 // Handle `impl HasX64V2` or `impl HasX64V2 + HasNeon`
113 let traits: Vec<String> = extract_trait_names_from_bounds(&impl_trait.bounds);
114 if traits.is_empty() {
115 None
116 } else {
117 Some(TokenTypeInfo::ImplTrait(traits))
118 }
119 }
120 _ => None,
121 }
122}
123
124/// Extract trait names from type param bounds.
125fn extract_trait_names_from_bounds(
126 bounds: &syn::punctuated::Punctuated<TypeParamBound, Token![+]>,
127) -> Vec<String> {
128 bounds
129 .iter()
130 .filter_map(|bound| {
131 if let TypeParamBound::Trait(trait_bound) = bound {
132 trait_bound
133 .path
134 .segments
135 .last()
136 .map(|seg| seg.ident.to_string())
137 } else {
138 None
139 }
140 })
141 .collect()
142}
143
144/// Look up a generic type parameter in the function's generics.
145fn find_generic_bounds(sig: &Signature, type_name: &str) -> Option<Vec<String>> {
146 // Check inline bounds first (e.g., `fn foo<T: HasX64V2>(token: T)`)
147 for param in &sig.generics.params {
148 if let GenericParam::Type(type_param) = param {
149 if type_param.ident == type_name {
150 let traits = extract_trait_names_from_bounds(&type_param.bounds);
151 if !traits.is_empty() {
152 return Some(traits);
153 }
154 }
155 }
156 }
157
158 // Check where clause (e.g., `fn foo<T>(token: T) where T: HasX64V2`)
159 if let Some(where_clause) = &sig.generics.where_clause {
160 for predicate in &where_clause.predicates {
161 if let syn::WherePredicate::Type(pred_type) = predicate {
162 if let Type::Path(type_path) = &pred_type.bounded_ty {
163 if let Some(seg) = type_path.path.segments.last() {
164 if seg.ident == type_name {
165 let traits = extract_trait_names_from_bounds(&pred_type.bounds);
166 if !traits.is_empty() {
167 return Some(traits);
168 }
169 }
170 }
171 }
172 }
173 }
174 }
175
176 None
177}
178
179/// Convert trait names to features, collecting all features from all traits.
180fn traits_to_features(trait_names: &[String]) -> Option<Vec<&'static str>> {
181 let mut all_features = Vec::new();
182
183 for trait_name in trait_names {
184 if let Some(features) = trait_to_features(trait_name) {
185 for &feature in features {
186 if !all_features.contains(&feature) {
187 all_features.push(feature);
188 }
189 }
190 }
191 }
192
193 if all_features.is_empty() {
194 None
195 } else {
196 Some(all_features)
197 }
198}
199
200/// Find the first token parameter and return its name, features, and target arch.
201///
202/// Returns `(param_ident, features, target_arch)` where:
203/// - `param_ident`: the parameter identifier
204/// - `features`: the target features to enable
205/// - `target_arch`: the target architecture (Some for concrete tokens, None for traits/generics)
206fn find_token_param(sig: &Signature) -> Option<(Ident, Vec<&'static str>, Option<&'static str>)> {
207 for arg in &sig.inputs {
208 match arg {
209 FnArg::Receiver(_) => {
210 // Self receivers (self, &self, &mut self) are not yet supported.
211 // The macro creates an inner function, and Rust's inner functions
212 // cannot have `self` parameters. Supporting this would require
213 // AST rewriting to replace `self` with a regular parameter.
214 // See the module docs for the workaround.
215 continue;
216 }
217 FnArg::Typed(PatType { pat, ty, .. }) => {
218 if let Some(info) = extract_token_type_info(ty) {
219 let (features, arch) = match info {
220 TokenTypeInfo::Concrete(ref name) => {
221 let features = token_to_features(name).map(|f| f.to_vec());
222 let arch = token_to_arch(name);
223 (features, arch)
224 }
225 TokenTypeInfo::ImplTrait(trait_names) => {
226 (traits_to_features(&trait_names), None)
227 }
228 TokenTypeInfo::Generic(type_name) => {
229 // Look up the generic parameter's bounds
230 let features = find_generic_bounds(sig, &type_name)
231 .and_then(|traits| traits_to_features(&traits));
232 (features, None)
233 }
234 };
235
236 if let Some(features) = features {
237 // Extract parameter name
238 if let syn::Pat::Ident(pat_ident) = pat.as_ref() {
239 return Some((pat_ident.ident.clone(), features, arch));
240 }
241 }
242 }
243 }
244 }
245 }
246 None
247}
248
249/// Represents the kind of self receiver and the transformed parameter.
250enum SelfReceiver {
251 /// `self` (by value/move)
252 Owned,
253 /// `&self` (shared reference)
254 Ref,
255 /// `&mut self` (mutable reference)
256 RefMut,
257}
258
259/// Shared implementation for arcane/arcane macros.
260fn arcane_impl(input_fn: ItemFn, macro_name: &str, args: ArcaneArgs) -> TokenStream {
261 // Check for self receiver
262 let has_self_receiver = input_fn
263 .sig
264 .inputs
265 .first()
266 .map(|arg| matches!(arg, FnArg::Receiver(_)))
267 .unwrap_or(false);
268
269 // If there's a self receiver, we need _self = Type
270 if has_self_receiver && args.self_type.is_none() {
271 let msg = format!(
272 "{} with self receiver requires `_self = Type` argument.\n\
273 Example: #[{}(_self = MyType)]\n\
274 Use `_self` (not `self`) in the function body to refer to self.",
275 macro_name, macro_name
276 );
277 return syn::Error::new_spanned(&input_fn.sig, msg)
278 .to_compile_error()
279 .into();
280 }
281
282 // Find the token parameter, its features, and target arch
283 let (_token_ident, features, target_arch) = match find_token_param(&input_fn.sig) {
284 Some(result) => result,
285 None => {
286 let msg = format!(
287 "{} requires a token parameter. Supported forms:\n\
288 - Concrete: `token: X64V3Token`\n\
289 - impl Trait: `token: impl Has256BitSimd`\n\
290 - Generic: `fn foo<T: HasX64V2>(token: T, ...)`\n\
291 - With self: `#[{}(_self = Type)] fn method(&self, token: impl HasNeon, ...)`",
292 macro_name, macro_name
293 );
294 return syn::Error::new_spanned(&input_fn.sig, msg)
295 .to_compile_error()
296 .into();
297 }
298 };
299
300 // Build target_feature attributes
301 let target_feature_attrs: Vec<Attribute> = features
302 .iter()
303 .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
304 .collect();
305
306 // Extract function components
307 let vis = &input_fn.vis;
308 let sig = &input_fn.sig;
309 let fn_name = &sig.ident;
310 let generics = &sig.generics;
311 let where_clause = &generics.where_clause;
312 let inputs = &sig.inputs;
313 let output = &sig.output;
314 let body = &input_fn.block;
315 let attrs = &input_fn.attrs;
316
317 // Determine self receiver type if present
318 let self_receiver_kind: Option<SelfReceiver> = inputs.first().and_then(|arg| match arg {
319 FnArg::Receiver(receiver) => {
320 if receiver.reference.is_none() {
321 Some(SelfReceiver::Owned)
322 } else if receiver.mutability.is_some() {
323 Some(SelfReceiver::RefMut)
324 } else {
325 Some(SelfReceiver::Ref)
326 }
327 }
328 _ => None,
329 });
330
331 // Build inner function parameters, transforming self if needed
332 let inner_params: Vec<proc_macro2::TokenStream> = inputs
333 .iter()
334 .map(|arg| match arg {
335 FnArg::Receiver(_) => {
336 // Transform self receiver to _self parameter
337 let self_ty = args.self_type.as_ref().unwrap();
338 match self_receiver_kind.as_ref().unwrap() {
339 SelfReceiver::Owned => quote!(_self: #self_ty),
340 SelfReceiver::Ref => quote!(_self: &#self_ty),
341 SelfReceiver::RefMut => quote!(_self: &mut #self_ty),
342 }
343 }
344 FnArg::Typed(pat_type) => quote!(#pat_type),
345 })
346 .collect();
347
348 // Build inner function call arguments
349 let inner_args: Vec<proc_macro2::TokenStream> = inputs
350 .iter()
351 .filter_map(|arg| match arg {
352 FnArg::Typed(pat_type) => {
353 if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
354 let ident = &pat_ident.ident;
355 Some(quote!(#ident))
356 } else {
357 None
358 }
359 }
360 FnArg::Receiver(_) => Some(quote!(self)), // Pass self to inner as _self
361 })
362 .collect();
363
364 let inner_fn_name = format_ident!("__simd_inner_{}", fn_name);
365
366 // Choose inline attribute based on args
367 // Note: #[inline(always)] + #[target_feature] requires nightly with
368 // #![feature(target_feature_inline_always)]
369 let inline_attr: Attribute = if args.inline_always {
370 parse_quote!(#[inline(always)])
371 } else {
372 parse_quote!(#[inline])
373 };
374
375 // Transform output and body to replace Self with concrete type if needed
376 let (inner_output, inner_body): (ReturnType, syn::Block) =
377 if let Some(ref self_ty) = args.self_type {
378 let mut replacer = ReplaceSelf {
379 replacement: self_ty,
380 };
381 let transformed_output = replacer.fold_return_type(output.clone());
382 let transformed_body = replacer.fold_block((**body).clone());
383 (transformed_output, transformed_body)
384 } else {
385 (output.clone(), (**body).clone())
386 };
387
388 // Generate the expanded function
389 // If we know the target arch (concrete token), generate cfg-gated real impl + stub
390 let expanded = if let Some(arch) = target_arch {
391 quote! {
392 // Real implementation for the correct architecture
393 #[cfg(target_arch = #arch)]
394 #(#attrs)*
395 #vis #sig {
396 #(#target_feature_attrs)*
397 #inline_attr
398 fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #where_clause
399 #inner_body
400
401 // SAFETY: The token parameter proves the required CPU features are available.
402 // Calling a #[target_feature] function from a non-matching context requires
403 // unsafe because the CPU may not support those instructions. The token's
404 // existence proves summon() succeeded, so the features are available.
405 unsafe { #inner_fn_name(#(#inner_args),*) }
406 }
407
408 // Stub for other architectures - the token cannot be obtained, so this is unreachable
409 #[cfg(not(target_arch = #arch))]
410 #(#attrs)*
411 #vis #sig {
412 // This token type cannot be summoned on this architecture.
413 // If you're seeing this at runtime, there's a bug in your dispatch logic.
414 let _ = (#(#inner_args),*); // suppress unused warnings
415 unreachable!(
416 concat!(
417 "Called ",
418 stringify!(#fn_name),
419 " with a token that cannot exist on this architecture. ",
420 "This token requires target_arch = \"",
421 #arch,
422 "\"."
423 )
424 )
425 }
426 }
427 } else {
428 // No specific arch (trait bounds or generic) - generate without cfg guards
429 quote! {
430 #(#attrs)*
431 #vis #sig {
432 #(#target_feature_attrs)*
433 #inline_attr
434 fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #where_clause
435 #inner_body
436
437 // SAFETY: Calling a #[target_feature] function from a non-matching context
438 // requires unsafe. The token proves the required CPU features are available.
439 unsafe { #inner_fn_name(#(#inner_args),*) }
440 }
441 }
442 };
443
444 expanded.into()
445}
446
447/// Mark a function as an arcane SIMD function.
448///
449/// This macro enables safe use of SIMD intrinsics by generating an inner function
450/// with the appropriate `#[target_feature(enable = "...")]` attributes based on
451/// the token parameter type. The outer function calls the inner function unsafely,
452/// which is justified because the token parameter proves the features are available.
453///
454/// **The token is passed through to the inner function**, so you can call other
455/// token-taking functions from inside `#[arcane]`.
456///
457/// # Token Parameter Forms
458///
459/// The macro supports four forms of token parameters:
460///
461/// ## Concrete Token Types
462///
463/// ```ignore
464/// #[arcane]
465/// fn process(token: Avx2Token, data: &[f32; 8]) -> [f32; 8] {
466/// // AVX2 intrinsics safe here
467/// }
468/// ```
469///
470/// ## impl Trait Bounds
471///
472/// ```ignore
473/// #[arcane]
474/// fn process(token: impl HasX64V2, data: &[f32; 8]) -> [f32; 8] {
475/// // Accepts any token with x86-64-v2 features (SSE4.2+)
476/// }
477/// ```
478///
479/// ## Generic Type Parameters
480///
481/// ```ignore
482/// #[arcane]
483/// fn process<T: HasX64V2>(token: T, data: &[f32; 8]) -> [f32; 8] {
484/// // Generic over any v2-capable token
485/// }
486///
487/// // Also works with where clauses:
488/// #[arcane]
489/// fn process<T>(token: T, data: &[f32; 8]) -> [f32; 8]
490/// where
491/// T: HasX64V2
492/// {
493/// // ...
494/// }
495/// ```
496///
497/// ## Methods with Self Receivers
498///
499/// Methods with `self`, `&self`, `&mut self` receivers are supported via the
500/// `_self = Type` argument. Use `_self` in the function body instead of `self`:
501///
502/// ```ignore
503/// use archmage::{X64V3Token, arcane};
504/// use wide::f32x8;
505///
506/// trait SimdOps {
507/// fn double(&self, token: X64V3Token) -> Self;
508/// fn square(self, token: X64V3Token) -> Self;
509/// fn scale(&mut self, token: X64V3Token, factor: f32);
510/// }
511///
512/// impl SimdOps for f32x8 {
513/// #[arcane(_self = f32x8)]
514/// fn double(&self, _token: X64V3Token) -> Self {
515/// // Use _self instead of self in the body
516/// *_self + *_self
517/// }
518///
519/// #[arcane(_self = f32x8)]
520/// fn square(self, _token: X64V3Token) -> Self {
521/// _self * _self
522/// }
523///
524/// #[arcane(_self = f32x8)]
525/// fn scale(&mut self, _token: X64V3Token, factor: f32) {
526/// *_self = *_self * f32x8::splat(factor);
527/// }
528/// }
529/// ```
530///
531/// **Why `_self`?** The macro generates an inner function where `self` becomes
532/// a regular parameter named `_self`. Using `_self` in your code reminds you
533/// that you're not using the normal `self` keyword.
534///
535/// **All receiver types are supported:**
536/// - `self` (by value/move) → `_self: Type`
537/// - `&self` (shared reference) → `_self: &Type`
538/// - `&mut self` (mutable reference) → `_self: &mut Type`
539///
540/// # Multiple Trait Bounds
541///
542/// When using `impl Trait` or generic bounds with multiple traits,
543/// all required features are enabled:
544///
545/// ```ignore
546/// #[arcane]
547/// fn fma_kernel(token: impl HasX64V2 + HasNeon, data: &[f32; 8]) -> [f32; 8] {
548/// // Cross-platform: SSE4.2 on x86, NEON on ARM
549/// }
550/// ```
551///
552/// # Expansion
553///
554/// The macro expands to approximately:
555///
556/// ```ignore
557/// fn process(token: Avx2Token, data: &[f32; 8]) -> [f32; 8] {
558/// #[target_feature(enable = "avx2")]
559/// #[inline]
560/// fn __simd_inner_process(token: Avx2Token, data: &[f32; 8]) -> [f32; 8] {
561/// let v = unsafe { _mm256_loadu_ps(data.as_ptr()) };
562/// let doubled = _mm256_add_ps(v, v);
563/// let mut out = [0.0f32; 8];
564/// unsafe { _mm256_storeu_ps(out.as_mut_ptr(), doubled) };
565/// out
566/// }
567/// // SAFETY: Calling #[target_feature] fn from non-matching context.
568/// // Token proves the required features are available.
569/// unsafe { __simd_inner_process(token, data) }
570/// }
571/// ```
572///
573/// # Profile Tokens
574///
575/// Profile tokens automatically enable all required features:
576///
577/// ```ignore
578/// #[arcane]
579/// fn kernel(token: X64V3Token, data: &mut [f32]) {
580/// // AVX2 + FMA + BMI1 + BMI2 intrinsics all safe here!
581/// }
582/// ```
583///
584/// # Supported Tokens
585///
586/// - **x86_64 tiers**: `X64V2Token`, `X64V3Token` / `Desktop64` / `Avx2FmaToken`,
587/// `X64V4Token` / `Avx512Token` / `Server64`, `Avx512ModernToken`, `Avx512Fp16Token`
588/// - **ARM**: `NeonToken` / `Arm64`, `NeonAesToken`, `NeonSha3Token`, `NeonCrcToken`
589/// - **WASM**: `Wasm128Token`
590///
591/// # Supported Trait Bounds
592///
593/// - **x86_64 tiers**: `HasX64V2`, `HasX64V4`
594/// - **ARM**: `HasNeon`, `HasNeonAes`, `HasNeonSha3`
595///
596/// **Preferred:** Use concrete tokens (`X64V3Token`, `Desktop64`, `NeonToken`) directly.
597/// Concrete token types also work as trait bounds (e.g., `impl X64V3Token`).
598///
599/// # Options
600///
601/// ## `inline_always`
602///
603/// Use `#[inline(always)]` instead of `#[inline]` for the inner function.
604/// This can improve performance by ensuring aggressive inlining, but requires
605/// nightly Rust with `#![feature(target_feature_inline_always)]` enabled in
606/// the crate using the macro.
607///
608/// ```ignore
609/// #![feature(target_feature_inline_always)]
610///
611/// #[arcane(inline_always)]
612/// fn fast_kernel(token: Avx2Token, data: &mut [f32]) {
613/// // Inner function will use #[inline(always)]
614/// }
615/// ```
616#[proc_macro_attribute]
617pub fn arcane(attr: TokenStream, item: TokenStream) -> TokenStream {
618 let args = parse_macro_input!(attr as ArcaneArgs);
619 let input_fn = parse_macro_input!(item as ItemFn);
620 arcane_impl(input_fn, "arcane", args)
621}
622
623/// Legacy alias for [`arcane`].
624///
625/// **Deprecated:** Use `#[arcane]` instead. This alias exists only for migration.
626#[proc_macro_attribute]
627#[doc(hidden)]
628pub fn simd_fn(attr: TokenStream, item: TokenStream) -> TokenStream {
629 let args = parse_macro_input!(attr as ArcaneArgs);
630 let input_fn = parse_macro_input!(item as ItemFn);
631 arcane_impl(input_fn, "simd_fn", args)
632}
633
634// ============================================================================
635// Rite macro for inner SIMD functions (no wrapper overhead)
636// ============================================================================
637
638/// Annotate inner SIMD helpers called from `#[arcane]` functions.
639///
640/// Unlike `#[arcane]`, which creates a wrapper function, `#[rite]` simply adds
641/// `#[target_feature]` and `#[inline]` attributes. This allows the function to
642/// inline directly into calling `#[arcane]` functions without optimization barriers.
643///
644/// # When to Use
645///
646/// Use `#[rite]` for helper functions that are **only** called from within
647/// `#[arcane]` functions with matching or superset token types:
648///
649/// ```ignore
650/// use archmage::{arcane, rite, X64V3Token};
651///
652/// #[arcane]
653/// fn outer(token: X64V3Token, data: &[f32; 8]) -> f32 {
654/// // helper inlines directly - no wrapper overhead
655/// helper(token, data) * 2.0
656/// }
657///
658/// #[rite]
659/// fn helper(token: X64V3Token, data: &[f32; 8]) -> f32 {
660/// // Just has #[target_feature(enable = "avx2,fma,...")]
661/// // Called from #[arcane] context, so features are guaranteed
662/// let v = f32x8::from_array(token, *data);
663/// v.reduce_add()
664/// }
665/// ```
666///
667/// # Safety
668///
669/// `#[rite]` functions can only be safely called from contexts where the
670/// required CPU features are enabled:
671/// - From within `#[arcane]` functions with matching/superset tokens
672/// - From within other `#[rite]` functions with matching/superset tokens
673/// - From code compiled with `-Ctarget-cpu` that enables the features
674///
675/// Calling from other contexts requires `unsafe` and the caller must ensure
676/// the CPU supports the required features.
677///
678/// # Comparison with #[arcane]
679///
680/// | Aspect | `#[arcane]` | `#[rite]` |
681/// |--------|-------------|-----------|
682/// | Creates wrapper | Yes | No |
683/// | Entry point | Yes | No |
684/// | Inlines into caller | No (barrier) | Yes |
685/// | Safe to call anywhere | Yes (with token) | Only from feature-enabled context |
686#[proc_macro_attribute]
687pub fn rite(attr: TokenStream, item: TokenStream) -> TokenStream {
688 // Parse optional arguments (currently just inline_always)
689 let args = parse_macro_input!(attr as RiteArgs);
690 let input_fn = parse_macro_input!(item as ItemFn);
691 rite_impl(input_fn, args)
692}
693
694/// Arguments for the `#[rite]` macro.
695///
696/// Currently empty - `#[inline(always)]` is not supported because
697/// `#[inline(always)]` + `#[target_feature]` requires nightly Rust.
698/// The regular `#[inline]` hint is sufficient when called from
699/// matching `#[target_feature]` contexts.
700#[derive(Default)]
701struct RiteArgs {
702 // No options currently - inline_always doesn't work on stable
703}
704
705impl Parse for RiteArgs {
706 fn parse(input: ParseStream) -> syn::Result<Self> {
707 if !input.is_empty() {
708 let ident: Ident = input.parse()?;
709 return Err(syn::Error::new(
710 ident.span(),
711 "#[rite] takes no arguments. Note: inline_always is not supported \
712 because #[inline(always)] + #[target_feature] requires nightly Rust.",
713 ));
714 }
715 Ok(RiteArgs::default())
716 }
717}
718
719/// Implementation for the `#[rite]` macro.
720fn rite_impl(mut input_fn: ItemFn, args: RiteArgs) -> TokenStream {
721 // Find the token parameter and its features
722 let (_, features, target_arch) = match find_token_param(&input_fn.sig) {
723 Some(result) => result,
724 None => {
725 let msg = "rite requires a token parameter. Supported forms:\n\
726 - Concrete: `token: X64V3Token`\n\
727 - impl Trait: `token: impl HasX64V2`\n\
728 - Generic: `fn foo<T: HasX64V2>(token: T, ...)`";
729 return syn::Error::new_spanned(&input_fn.sig, msg)
730 .to_compile_error()
731 .into();
732 }
733 };
734
735 // Build target_feature attributes
736 let target_feature_attrs: Vec<Attribute> = features
737 .iter()
738 .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
739 .collect();
740
741 // Always use #[inline] - #[inline(always)] + #[target_feature] requires nightly
742 let _ = args; // RiteArgs is currently empty but kept for future extensibility
743 let inline_attr: Attribute = parse_quote!(#[inline]);
744
745 // Prepend attributes to the function
746 let mut new_attrs = target_feature_attrs;
747 new_attrs.push(inline_attr);
748 new_attrs.append(&mut input_fn.attrs);
749 input_fn.attrs = new_attrs;
750
751 // If we know the target arch, generate cfg-gated impl + stub
752 if let Some(arch) = target_arch {
753 let vis = &input_fn.vis;
754 let sig = &input_fn.sig;
755 let attrs = &input_fn.attrs;
756 let block = &input_fn.block;
757
758 quote! {
759 #[cfg(target_arch = #arch)]
760 #(#attrs)*
761 #vis #sig
762 #block
763
764 #[cfg(not(target_arch = #arch))]
765 #vis #sig {
766 unreachable!(concat!(
767 "This function requires ",
768 #arch,
769 " architecture"
770 ))
771 }
772 }
773 .into()
774 } else {
775 // No specific arch (trait bounds) - just emit the annotated function
776 quote!(#input_fn).into()
777 }
778}
779
780// =============================================================================
781// magetypes! macro - generate platform variants from generic function
782// =============================================================================
783
784/// Configuration for a magetypes variant
785struct MagetypesVariant {
786 suffix: &'static str,
787 token_type: &'static str,
788 target_arch: Option<&'static str>,
789 cargo_feature: Option<&'static str>,
790}
791
792const MAGETYPES_VARIANTS: &[MagetypesVariant] = &[
793 // x86_64 V3 (AVX2)
794 MagetypesVariant {
795 suffix: "v3",
796 token_type: "archmage::X64V3Token",
797 target_arch: Some("x86_64"),
798 cargo_feature: None,
799 },
800 // x86_64 V4 (AVX-512)
801 MagetypesVariant {
802 suffix: "v4",
803 token_type: "archmage::X64V4Token",
804 target_arch: Some("x86_64"),
805 cargo_feature: Some("avx512"),
806 },
807 // aarch64 NEON
808 MagetypesVariant {
809 suffix: "neon",
810 token_type: "archmage::NeonToken",
811 target_arch: Some("aarch64"),
812 cargo_feature: None,
813 },
814 // wasm32 SIMD128
815 MagetypesVariant {
816 suffix: "wasm128",
817 token_type: "archmage::Wasm128Token",
818 target_arch: Some("wasm32"),
819 cargo_feature: None,
820 },
821 // Scalar fallback
822 MagetypesVariant {
823 suffix: "scalar",
824 token_type: "archmage::ScalarToken",
825 target_arch: None, // Always available
826 cargo_feature: None,
827 },
828];
829
830/// Generate platform-specific variants from a function using explicit types.
831///
832/// Write your function with explicit SIMD types (e.g., `f32x8`) and use `Token`
833/// as a placeholder for the token type. The macro generates platform-specific
834/// variants (`_v3`, `_neon`, `_wasm128`, `_scalar`) with cfg guards.
835///
836/// # How It Works
837///
838/// - `Token` is replaced with the concrete token type for each variant
839/// - Each variant is wrapped in the appropriate `#[cfg(target_arch = ...)]`
840/// - Use `use magetypes::simd::*;` to get types that work on all platforms
841/// (native on x86, polyfilled on ARM/WASM)
842///
843/// # Example
844///
845/// ```rust,ignore
846/// use archmage::magetypes;
847/// use magetypes::simd::*; // f32x8 works everywhere via polyfill
848///
849/// #[magetypes]
850/// pub fn dot(token: Token, a: &[f32; 8], b: &[f32; 8]) -> f32 {
851/// let va = f32x8::load(token, a);
852/// let vb = f32x8::load(token, b);
853/// (va * vb).reduce_add()
854/// }
855///
856/// // Generates:
857/// // - dot_v3(token: X64V3Token, ...) - x86_64 only
858/// // - dot_v4(token: X64V4Token, ...) - x86_64 + avx512 feature
859/// // - dot_neon(token: NeonToken, ...) - aarch64 only
860/// // - dot_wasm128(token: Wasm128Token, ...) - wasm32 only
861/// // - dot_scalar(token: ScalarToken, ...) - always available
862/// ```
863///
864/// # Usage with incant!
865///
866/// The generated variants work with `incant!` for dispatch:
867///
868/// ```rust,ignore
869/// pub fn dot_api(a: &[f32; 8], b: &[f32; 8]) -> f32 {
870/// incant!(dot(a, b))
871/// }
872/// ```
873#[proc_macro_attribute]
874pub fn magetypes(attr: TokenStream, item: TokenStream) -> TokenStream {
875 // Ignore attributes for now (could add variant selection later)
876 let _ = attr;
877 let input_fn = parse_macro_input!(item as ItemFn);
878 magetypes_impl(input_fn)
879}
880
881fn magetypes_impl(input_fn: ItemFn) -> TokenStream {
882 let fn_name = &input_fn.sig.ident;
883 let fn_attrs = &input_fn.attrs;
884
885 // Convert function to string for text substitution
886 let fn_str = input_fn.to_token_stream().to_string();
887
888 let mut variants = Vec::new();
889
890 for variant in MAGETYPES_VARIANTS {
891 // Create suffixed function name
892 let suffixed_name = format!("{}_{}", fn_name, variant.suffix);
893
894 // Do text substitution
895 let mut variant_str = fn_str.clone();
896
897 // Replace function name
898 variant_str = variant_str.replacen(&fn_name.to_string(), &suffixed_name, 1);
899
900 // Replace Token type with concrete token
901 variant_str = variant_str.replace("Token", variant.token_type);
902
903 // Parse back to tokens
904 let variant_tokens: proc_macro2::TokenStream = match variant_str.parse() {
905 Ok(t) => t,
906 Err(e) => {
907 return syn::Error::new_spanned(
908 &input_fn,
909 format!(
910 "Failed to parse generated variant `{}`: {}",
911 suffixed_name, e
912 ),
913 )
914 .to_compile_error()
915 .into();
916 }
917 };
918
919 // Add cfg guards
920 let cfg_guard = match (variant.target_arch, variant.cargo_feature) {
921 (Some(arch), Some(feature)) => {
922 quote! { #[cfg(all(target_arch = #arch, feature = #feature))] }
923 }
924 (Some(arch), None) => {
925 quote! { #[cfg(target_arch = #arch)] }
926 }
927 (None, Some(feature)) => {
928 quote! { #[cfg(feature = #feature)] }
929 }
930 (None, None) => {
931 quote! {} // No guard needed (scalar)
932 }
933 };
934
935 variants.push(quote! {
936 #cfg_guard
937 #variant_tokens
938 });
939 }
940
941 // Remove attributes from the list that should not be duplicated
942 let filtered_attrs: Vec<_> = fn_attrs
943 .iter()
944 .filter(|a| !a.path().is_ident("magetypes"))
945 .collect();
946
947 let output = quote! {
948 #(#filtered_attrs)*
949 #(#variants)*
950 };
951
952 output.into()
953}
954
955// =============================================================================
956// incant! macro - dispatch to platform-specific variants
957// =============================================================================
958
959/// Input for the incant! macro
960struct IncantInput {
961 /// Function name to call
962 func_name: Ident,
963 /// Arguments to pass
964 args: Vec<syn::Expr>,
965 /// Optional token variable for passthrough mode
966 with_token: Option<syn::Expr>,
967}
968
969impl Parse for IncantInput {
970 fn parse(input: ParseStream) -> syn::Result<Self> {
971 // Parse: function_name(arg1, arg2, ...) [with token_expr]
972 let func_name: Ident = input.parse()?;
973
974 // Parse parenthesized arguments
975 let content;
976 syn::parenthesized!(content in input);
977 let args = content
978 .parse_terminated(syn::Expr::parse, Token![,])?
979 .into_iter()
980 .collect();
981
982 // Check for optional "with token"
983 let with_token = if input.peek(Ident) {
984 let kw: Ident = input.parse()?;
985 if kw != "with" {
986 return Err(syn::Error::new_spanned(kw, "expected `with` keyword"));
987 }
988 Some(input.parse()?)
989 } else {
990 None
991 };
992
993 Ok(IncantInput {
994 func_name,
995 args,
996 with_token,
997 })
998 }
999}
1000
1001/// Dispatch to platform-specific SIMD variants.
1002///
1003/// # Entry Point Mode (no token yet)
1004///
1005/// Summons tokens and dispatches to the best available variant:
1006///
1007/// ```rust,ignore
1008/// pub fn public_api(data: &[f32]) -> f32 {
1009/// incant!(dot(data))
1010/// }
1011/// ```
1012///
1013/// Expands to runtime feature detection + dispatch to `dot_v3`, `dot_v4`,
1014/// `dot_neon`, `dot_wasm128`, or `dot_scalar`.
1015///
1016/// # Passthrough Mode (already have token)
1017///
1018/// Uses compile-time dispatch via `IntoConcreteToken`:
1019///
1020/// ```rust,ignore
1021/// #[arcane]
1022/// fn outer(token: X64V3Token, data: &[f32]) -> f32 {
1023/// incant!(inner(data) with token)
1024/// }
1025/// ```
1026///
1027/// The compiler monomorphizes the dispatch, eliminating non-matching branches.
1028///
1029/// # Variant Naming
1030///
1031/// Functions must have suffixed variants:
1032/// - `_v3` for `X64V3Token`
1033/// - `_v4` for `X64V4Token` (requires `avx512` feature)
1034/// - `_neon` for `NeonToken`
1035/// - `_wasm128` for `Wasm128Token`
1036/// - `_scalar` for `ScalarToken`
1037#[proc_macro]
1038pub fn incant(input: TokenStream) -> TokenStream {
1039 let input = parse_macro_input!(input as IncantInput);
1040 incant_impl(input)
1041}
1042
1043/// Legacy alias for [`incant!`].
1044#[proc_macro]
1045pub fn simd_route(input: TokenStream) -> TokenStream {
1046 let input = parse_macro_input!(input as IncantInput);
1047 incant_impl(input)
1048}
1049
1050fn incant_impl(input: IncantInput) -> TokenStream {
1051 let func_name = &input.func_name;
1052 let args = &input.args;
1053
1054 // Create suffixed function names
1055 let fn_v3 = format_ident!("{}_v3", func_name);
1056 let fn_v4 = format_ident!("{}_v4", func_name);
1057 let fn_neon = format_ident!("{}_neon", func_name);
1058 let fn_wasm128 = format_ident!("{}_wasm128", func_name);
1059 let fn_scalar = format_ident!("{}_scalar", func_name);
1060
1061 // Use labeled blocks instead of `return` so incant! can be chained.
1062 // Labeled blocks are stable since Rust 1.65.
1063 if let Some(token_expr) = &input.with_token {
1064 // Passthrough mode: use IntoConcreteToken for compile-time dispatch
1065 let expanded = quote! {
1066 '__incant: {
1067 use archmage::IntoConcreteToken;
1068 let __incant_token = #token_expr;
1069
1070 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
1071 {
1072 #[cfg(feature = "avx512")]
1073 if let Some(__t) = __incant_token.as_x64v4() {
1074 break '__incant #fn_v4(__t, #(#args),*);
1075 }
1076 if let Some(__t) = __incant_token.as_x64v3() {
1077 break '__incant #fn_v3(__t, #(#args),*);
1078 }
1079 }
1080
1081 #[cfg(target_arch = "aarch64")]
1082 if let Some(__t) = __incant_token.as_neon() {
1083 break '__incant #fn_neon(__t, #(#args),*);
1084 }
1085
1086 #[cfg(target_arch = "wasm32")]
1087 if let Some(__t) = __incant_token.as_wasm128() {
1088 break '__incant #fn_wasm128(__t, #(#args),*);
1089 }
1090
1091 if let Some(__t) = __incant_token.as_scalar() {
1092 break '__incant #fn_scalar(__t, #(#args),*);
1093 }
1094
1095 unreachable!("Token did not match any known variant")
1096 }
1097 };
1098 expanded.into()
1099 } else {
1100 // Entry point mode: summon tokens and dispatch
1101 let expanded = quote! {
1102 '__incant: {
1103 use archmage::SimdToken;
1104
1105 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
1106 {
1107 #[cfg(feature = "avx512")]
1108 if let Some(__t) = archmage::X64V4Token::summon() {
1109 break '__incant #fn_v4(__t, #(#args),*);
1110 }
1111 if let Some(__t) = archmage::X64V3Token::summon() {
1112 break '__incant #fn_v3(__t, #(#args),*);
1113 }
1114 }
1115
1116 #[cfg(target_arch = "aarch64")]
1117 if let Some(__t) = archmage::NeonToken::summon() {
1118 break '__incant #fn_neon(__t, #(#args),*);
1119 }
1120
1121 #[cfg(target_arch = "wasm32")]
1122 if let Some(__t) = archmage::Wasm128Token::summon() {
1123 break '__incant #fn_wasm128(__t, #(#args),*);
1124 }
1125
1126 // Scalar fallback
1127 #fn_scalar(archmage::ScalarToken, #(#args),*)
1128 }
1129 };
1130 expanded.into()
1131 }
1132}
1133
1134// =============================================================================
1135// Unit tests for token/trait recognition maps
1136// =============================================================================
1137
1138#[cfg(test)]
1139mod tests {
1140 use super::*;
1141
1142 use super::generated::{ALL_CONCRETE_TOKENS, ALL_TRAIT_NAMES};
1143
1144 #[test]
1145 fn every_concrete_token_is_in_token_to_features() {
1146 for &name in ALL_CONCRETE_TOKENS {
1147 assert!(
1148 token_to_features(name).is_some(),
1149 "Token `{}` exists in runtime crate but is NOT recognized by \
1150 token_to_features() in the proc macro. Add it!",
1151 name
1152 );
1153 }
1154 }
1155
1156 #[test]
1157 fn every_trait_is_in_trait_to_features() {
1158 for &name in ALL_TRAIT_NAMES {
1159 assert!(
1160 trait_to_features(name).is_some(),
1161 "Trait `{}` exists in runtime crate but is NOT recognized by \
1162 trait_to_features() in the proc macro. Add it!",
1163 name
1164 );
1165 }
1166 }
1167
1168 #[test]
1169 fn token_aliases_map_to_same_features() {
1170 // Desktop64 = X64V3Token
1171 assert_eq!(
1172 token_to_features("Desktop64"),
1173 token_to_features("X64V3Token"),
1174 "Desktop64 and X64V3Token should map to identical features"
1175 );
1176
1177 // Server64 = X64V4Token = Avx512Token
1178 assert_eq!(
1179 token_to_features("Server64"),
1180 token_to_features("X64V4Token"),
1181 "Server64 and X64V4Token should map to identical features"
1182 );
1183 assert_eq!(
1184 token_to_features("X64V4Token"),
1185 token_to_features("Avx512Token"),
1186 "X64V4Token and Avx512Token should map to identical features"
1187 );
1188
1189 // Arm64 = NeonToken
1190 assert_eq!(
1191 token_to_features("Arm64"),
1192 token_to_features("NeonToken"),
1193 "Arm64 and NeonToken should map to identical features"
1194 );
1195 }
1196
1197 #[test]
1198 fn trait_to_features_includes_tokens_as_bounds() {
1199 // Tier tokens should also work as trait bounds
1200 // (for `impl X64V3Token` patterns, even though Rust won't allow it,
1201 // the macro processes AST before type checking)
1202 let tier_tokens = [
1203 "X64V2Token",
1204 "X64V3Token",
1205 "Desktop64",
1206 "Avx2FmaToken",
1207 "X64V4Token",
1208 "Avx512Token",
1209 "Server64",
1210 "Avx512ModernToken",
1211 "Avx512Fp16Token",
1212 "NeonToken",
1213 "Arm64",
1214 "NeonAesToken",
1215 "NeonSha3Token",
1216 "NeonCrcToken",
1217 ];
1218
1219 for &name in &tier_tokens {
1220 assert!(
1221 trait_to_features(name).is_some(),
1222 "Tier token `{}` should also be recognized in trait_to_features() \
1223 for use as a generic bound. Add it!",
1224 name
1225 );
1226 }
1227 }
1228
1229 #[test]
1230 fn trait_features_are_cumulative() {
1231 // HasX64V4 should include all HasX64V2 features plus more
1232 let v2_features = trait_to_features("HasX64V2").unwrap();
1233 let v4_features = trait_to_features("HasX64V4").unwrap();
1234
1235 for &f in v2_features {
1236 assert!(
1237 v4_features.contains(&f),
1238 "HasX64V4 should include v2 feature `{}` but doesn't",
1239 f
1240 );
1241 }
1242
1243 // v4 should have more features than v2
1244 assert!(
1245 v4_features.len() > v2_features.len(),
1246 "HasX64V4 should have more features than HasX64V2"
1247 );
1248 }
1249
1250 #[test]
1251 fn x64v3_trait_features_include_v2() {
1252 // X64V3Token as trait bound should include v2 features
1253 let v2 = trait_to_features("HasX64V2").unwrap();
1254 let v3 = trait_to_features("X64V3Token").unwrap();
1255
1256 for &f in v2 {
1257 assert!(
1258 v3.contains(&f),
1259 "X64V3Token trait features should include v2 feature `{}` but don't",
1260 f
1261 );
1262 }
1263 }
1264
1265 #[test]
1266 fn has_neon_aes_includes_neon() {
1267 let neon = trait_to_features("HasNeon").unwrap();
1268 let neon_aes = trait_to_features("HasNeonAes").unwrap();
1269
1270 for &f in neon {
1271 assert!(
1272 neon_aes.contains(&f),
1273 "HasNeonAes should include NEON feature `{}`",
1274 f
1275 );
1276 }
1277 }
1278
1279 #[test]
1280 fn no_removed_traits_are_recognized() {
1281 // These traits were removed in 0.3.0 and should NOT be recognized
1282 let removed = [
1283 "HasSse",
1284 "HasSse2",
1285 "HasSse41",
1286 "HasSse42",
1287 "HasAvx",
1288 "HasAvx2",
1289 "HasFma",
1290 "HasAvx512f",
1291 "HasAvx512bw",
1292 "HasAvx512vl",
1293 "HasAvx512vbmi2",
1294 "HasSve",
1295 "HasSve2",
1296 ];
1297
1298 for &name in &removed {
1299 assert!(
1300 trait_to_features(name).is_none(),
1301 "Removed trait `{}` should NOT be in trait_to_features(). \
1302 It was removed in 0.3.0 — users should migrate to tier traits.",
1303 name
1304 );
1305 }
1306 }
1307
1308 #[test]
1309 fn no_nonexistent_tokens_are_recognized() {
1310 // These tokens don't exist and should NOT be recognized
1311 let fake = [
1312 "Sse2Token",
1313 "SveToken",
1314 "Sve2Token",
1315 "Avx512VnniToken",
1316 "X64V4ModernToken",
1317 "NeonFp16Token",
1318 ];
1319
1320 for &name in &fake {
1321 assert!(
1322 token_to_features(name).is_none(),
1323 "Non-existent token `{}` should NOT be in token_to_features()",
1324 name
1325 );
1326 }
1327 }
1328}