archmage_macros/lib.rs
1//! Proc-macros for archmage SIMD capability tokens.
2//!
3//! Provides `#[arcane]` attribute (with `#[arcane]` alias) to make raw intrinsics
4//! safe via token proof.
5
6use proc_macro::TokenStream;
7use quote::{format_ident, quote, ToTokens};
8use syn::{
9 fold::Fold,
10 parse::{Parse, ParseStream},
11 parse_macro_input, parse_quote, Attribute, FnArg, GenericParam, Ident, ItemFn, PatType,
12 ReturnType, Signature, Token, Type, TypeParamBound,
13};
14
15/// A Fold implementation that replaces `Self` with a concrete type.
16struct ReplaceSelf<'a> {
17 replacement: &'a Type,
18}
19
20impl Fold for ReplaceSelf<'_> {
21 fn fold_type(&mut self, ty: Type) -> Type {
22 match ty {
23 Type::Path(ref type_path) if type_path.qself.is_none() => {
24 // Check if it's just `Self`
25 if type_path.path.is_ident("Self") {
26 return self.replacement.clone();
27 }
28 // Otherwise continue folding
29 syn::fold::fold_type(self, ty)
30 }
31 _ => syn::fold::fold_type(self, ty),
32 }
33 }
34}
35
36/// Arguments to the `#[arcane]` macro.
37#[derive(Default)]
38struct ArcaneArgs {
39 /// Use `#[inline(always)]` instead of `#[inline]` for the inner function.
40 /// Requires nightly Rust with `#![feature(target_feature_inline_always)]`.
41 inline_always: bool,
42 /// The concrete type to use for `self` receiver.
43 /// When specified, `self`/`&self`/`&mut self` is transformed to `_self: Type`/`&Type`/`&mut Type`.
44 self_type: Option<Type>,
45}
46
47impl Parse for ArcaneArgs {
48 fn parse(input: ParseStream) -> syn::Result<Self> {
49 let mut args = ArcaneArgs::default();
50
51 while !input.is_empty() {
52 let ident: Ident = input.parse()?;
53 match ident.to_string().as_str() {
54 "inline_always" => args.inline_always = true,
55 "_self" => {
56 let _: Token![=] = input.parse()?;
57 args.self_type = Some(input.parse()?);
58 }
59 other => {
60 return Err(syn::Error::new(
61 ident.span(),
62 format!("unknown arcane argument: `{}`", other),
63 ))
64 }
65 }
66 // Consume optional comma
67 if input.peek(Token![,]) {
68 let _: Token![,] = input.parse()?;
69 }
70 }
71
72 Ok(args)
73 }
74}
75
76// Token-to-features and trait-to-features mappings are generated from
77// token-registry.toml by xtask. Regenerate with: cargo run -p xtask -- generate
78mod generated;
79use generated::{token_to_arch, token_to_features, trait_to_features};
80
81/// Result of extracting token info from a type.
82enum TokenTypeInfo {
83 /// Concrete token type (e.g., `Avx2Token`)
84 Concrete(String),
85 /// impl Trait with the trait names (e.g., `impl HasX64V2`)
86 ImplTrait(Vec<String>),
87 /// Generic type parameter name (e.g., `T`)
88 Generic(String),
89}
90
91/// Extract token type information from a type.
92fn extract_token_type_info(ty: &Type) -> Option<TokenTypeInfo> {
93 match ty {
94 Type::Path(type_path) => {
95 // Get the last segment of the path (e.g., "Avx2Token" from "archmage::Avx2Token")
96 type_path.path.segments.last().map(|seg| {
97 let name = seg.ident.to_string();
98 // Check if it's a known concrete token type
99 if token_to_features(&name).is_some() {
100 TokenTypeInfo::Concrete(name)
101 } else {
102 // Might be a generic type parameter like `T`
103 TokenTypeInfo::Generic(name)
104 }
105 })
106 }
107 Type::Reference(type_ref) => {
108 // Handle &Token or &mut Token
109 extract_token_type_info(&type_ref.elem)
110 }
111 Type::ImplTrait(impl_trait) => {
112 // Handle `impl HasX64V2` or `impl HasX64V2 + HasNeon`
113 let traits: Vec<String> = extract_trait_names_from_bounds(&impl_trait.bounds);
114 if traits.is_empty() {
115 None
116 } else {
117 Some(TokenTypeInfo::ImplTrait(traits))
118 }
119 }
120 _ => None,
121 }
122}
123
124/// Extract trait names from type param bounds.
125fn extract_trait_names_from_bounds(
126 bounds: &syn::punctuated::Punctuated<TypeParamBound, Token![+]>,
127) -> Vec<String> {
128 bounds
129 .iter()
130 .filter_map(|bound| {
131 if let TypeParamBound::Trait(trait_bound) = bound {
132 trait_bound
133 .path
134 .segments
135 .last()
136 .map(|seg| seg.ident.to_string())
137 } else {
138 None
139 }
140 })
141 .collect()
142}
143
144/// Look up a generic type parameter in the function's generics.
145fn find_generic_bounds(sig: &Signature, type_name: &str) -> Option<Vec<String>> {
146 // Check inline bounds first (e.g., `fn foo<T: HasX64V2>(token: T)`)
147 for param in &sig.generics.params {
148 if let GenericParam::Type(type_param) = param {
149 if type_param.ident == type_name {
150 let traits = extract_trait_names_from_bounds(&type_param.bounds);
151 if !traits.is_empty() {
152 return Some(traits);
153 }
154 }
155 }
156 }
157
158 // Check where clause (e.g., `fn foo<T>(token: T) where T: HasX64V2`)
159 if let Some(where_clause) = &sig.generics.where_clause {
160 for predicate in &where_clause.predicates {
161 if let syn::WherePredicate::Type(pred_type) = predicate {
162 if let Type::Path(type_path) = &pred_type.bounded_ty {
163 if let Some(seg) = type_path.path.segments.last() {
164 if seg.ident == type_name {
165 let traits = extract_trait_names_from_bounds(&pred_type.bounds);
166 if !traits.is_empty() {
167 return Some(traits);
168 }
169 }
170 }
171 }
172 }
173 }
174 }
175
176 None
177}
178
179/// Convert trait names to features, collecting all features from all traits.
180fn traits_to_features(trait_names: &[String]) -> Option<Vec<&'static str>> {
181 let mut all_features = Vec::new();
182
183 for trait_name in trait_names {
184 if let Some(features) = trait_to_features(trait_name) {
185 for &feature in features {
186 if !all_features.contains(&feature) {
187 all_features.push(feature);
188 }
189 }
190 }
191 }
192
193 if all_features.is_empty() {
194 None
195 } else {
196 Some(all_features)
197 }
198}
199
200/// Result of finding a token parameter in a function signature.
201struct TokenParamInfo {
202 /// The parameter identifier (e.g., `token`)
203 ident: Ident,
204 /// Target features to enable (e.g., `["avx2", "fma"]`)
205 features: Vec<&'static str>,
206 /// Target architecture (Some for concrete tokens, None for traits/generics)
207 target_arch: Option<&'static str>,
208 /// Concrete token type name (Some for concrete tokens, None for traits/generics)
209 token_type_name: Option<String>,
210}
211
212/// Find the first token parameter in a function signature.
213fn find_token_param(sig: &Signature) -> Option<TokenParamInfo> {
214 for arg in &sig.inputs {
215 match arg {
216 FnArg::Receiver(_) => {
217 // Self receivers (self, &self, &mut self) are not yet supported.
218 // The macro creates an inner function, and Rust's inner functions
219 // cannot have `self` parameters. Supporting this would require
220 // AST rewriting to replace `self` with a regular parameter.
221 // See the module docs for the workaround.
222 continue;
223 }
224 FnArg::Typed(PatType { pat, ty, .. }) => {
225 if let Some(info) = extract_token_type_info(ty) {
226 let (features, arch, token_name) = match info {
227 TokenTypeInfo::Concrete(ref name) => {
228 let features = token_to_features(name).map(|f| f.to_vec());
229 let arch = token_to_arch(name);
230 (features, arch, Some(name.clone()))
231 }
232 TokenTypeInfo::ImplTrait(trait_names) => {
233 (traits_to_features(&trait_names), None, None)
234 }
235 TokenTypeInfo::Generic(type_name) => {
236 // Look up the generic parameter's bounds
237 let features = find_generic_bounds(sig, &type_name)
238 .and_then(|traits| traits_to_features(&traits));
239 (features, None, None)
240 }
241 };
242
243 if let Some(features) = features {
244 // Extract parameter name
245 if let syn::Pat::Ident(pat_ident) = pat.as_ref() {
246 return Some(TokenParamInfo {
247 ident: pat_ident.ident.clone(),
248 features,
249 target_arch: arch,
250 token_type_name: token_name,
251 });
252 }
253 }
254 }
255 }
256 }
257 }
258 None
259}
260
261/// Represents the kind of self receiver and the transformed parameter.
262enum SelfReceiver {
263 /// `self` (by value/move)
264 Owned,
265 /// `&self` (shared reference)
266 Ref,
267 /// `&mut self` (mutable reference)
268 RefMut,
269}
270
271/// Shared implementation for arcane/arcane macros.
272fn arcane_impl(input_fn: ItemFn, macro_name: &str, args: ArcaneArgs) -> TokenStream {
273 // Check for self receiver
274 let has_self_receiver = input_fn
275 .sig
276 .inputs
277 .first()
278 .map(|arg| matches!(arg, FnArg::Receiver(_)))
279 .unwrap_or(false);
280
281 // If there's a self receiver, we need _self = Type
282 if has_self_receiver && args.self_type.is_none() {
283 let msg = format!(
284 "{} with self receiver requires `_self = Type` argument.\n\
285 Example: #[{}(_self = MyType)]\n\
286 Use `_self` (not `self`) in the function body to refer to self.",
287 macro_name, macro_name
288 );
289 return syn::Error::new_spanned(&input_fn.sig, msg)
290 .to_compile_error()
291 .into();
292 }
293
294 // Find the token parameter, its features, target arch, and token type name
295 let TokenParamInfo {
296 ident: _token_ident,
297 features,
298 target_arch,
299 token_type_name,
300 } = match find_token_param(&input_fn.sig) {
301 Some(result) => result,
302 None => {
303 let msg = format!(
304 "{} requires a token parameter. Supported forms:\n\
305 - Concrete: `token: X64V3Token`\n\
306 - impl Trait: `token: impl Has256BitSimd`\n\
307 - Generic: `fn foo<T: HasX64V2>(token: T, ...)`\n\
308 - With self: `#[{}(_self = Type)] fn method(&self, token: impl HasNeon, ...)`",
309 macro_name, macro_name
310 );
311 return syn::Error::new_spanned(&input_fn.sig, msg)
312 .to_compile_error()
313 .into();
314 }
315 };
316
317 // Build target_feature attributes
318 let target_feature_attrs: Vec<Attribute> = features
319 .iter()
320 .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
321 .collect();
322
323 // Extract function components
324 let vis = &input_fn.vis;
325 let sig = &input_fn.sig;
326 let fn_name = &sig.ident;
327 let generics = &sig.generics;
328 let where_clause = &generics.where_clause;
329 let inputs = &sig.inputs;
330 let output = &sig.output;
331 let body = &input_fn.block;
332 let attrs = &input_fn.attrs;
333
334 // Determine self receiver type if present
335 let self_receiver_kind: Option<SelfReceiver> = inputs.first().and_then(|arg| match arg {
336 FnArg::Receiver(receiver) => {
337 if receiver.reference.is_none() {
338 Some(SelfReceiver::Owned)
339 } else if receiver.mutability.is_some() {
340 Some(SelfReceiver::RefMut)
341 } else {
342 Some(SelfReceiver::Ref)
343 }
344 }
345 _ => None,
346 });
347
348 // Build inner function parameters, transforming self if needed
349 let inner_params: Vec<proc_macro2::TokenStream> = inputs
350 .iter()
351 .map(|arg| match arg {
352 FnArg::Receiver(_) => {
353 // Transform self receiver to _self parameter
354 let self_ty = args.self_type.as_ref().unwrap();
355 match self_receiver_kind.as_ref().unwrap() {
356 SelfReceiver::Owned => quote!(_self: #self_ty),
357 SelfReceiver::Ref => quote!(_self: &#self_ty),
358 SelfReceiver::RefMut => quote!(_self: &mut #self_ty),
359 }
360 }
361 FnArg::Typed(pat_type) => quote!(#pat_type),
362 })
363 .collect();
364
365 // Build inner function call arguments
366 let inner_args: Vec<proc_macro2::TokenStream> = inputs
367 .iter()
368 .filter_map(|arg| match arg {
369 FnArg::Typed(pat_type) => {
370 if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
371 let ident = &pat_ident.ident;
372 Some(quote!(#ident))
373 } else {
374 None
375 }
376 }
377 FnArg::Receiver(_) => Some(quote!(self)), // Pass self to inner as _self
378 })
379 .collect();
380
381 let inner_fn_name = format_ident!("__simd_inner_{}", fn_name);
382
383 // Choose inline attribute based on args
384 // Note: #[inline(always)] + #[target_feature] requires nightly with
385 // #![feature(target_feature_inline_always)]
386 let inline_attr: Attribute = if args.inline_always {
387 parse_quote!(#[inline(always)])
388 } else {
389 parse_quote!(#[inline])
390 };
391
392 // Transform output and body to replace Self with concrete type if needed
393 let (inner_output, inner_body): (ReturnType, syn::Block) =
394 if let Some(ref self_ty) = args.self_type {
395 let mut replacer = ReplaceSelf {
396 replacement: self_ty,
397 };
398 let transformed_output = replacer.fold_return_type(output.clone());
399 let transformed_body = replacer.fold_block((**body).clone());
400 (transformed_output, transformed_body)
401 } else {
402 (output.clone(), (**body).clone())
403 };
404
405 // Generate the expanded function
406 // If we know the target arch (concrete token), generate cfg-gated real impl + stub
407 let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
408 let expanded = if let Some(arch) = target_arch {
409 quote! {
410 // Real implementation for the correct architecture
411 #[cfg(target_arch = #arch)]
412 #(#attrs)*
413 #vis #sig {
414 #(#target_feature_attrs)*
415 #inline_attr
416 fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #where_clause
417 #inner_body
418
419 // SAFETY: The token parameter proves the required CPU features are available.
420 // Calling a #[target_feature] function from a non-matching context requires
421 // unsafe because the CPU may not support those instructions. The token's
422 // existence proves summon() succeeded, so the features are available.
423 unsafe { #inner_fn_name(#(#inner_args),*) }
424 }
425
426 // Stub for other architectures - the token cannot be obtained, so this is unreachable
427 #[cfg(not(target_arch = #arch))]
428 #(#attrs)*
429 #vis #sig {
430 // This token type cannot be summoned on this architecture.
431 // If you're seeing this at runtime, there's a bug in dispatch logic
432 // or forge_token_dangerously() was used incorrectly.
433 let _ = (#(#inner_args),*); // suppress unused warnings
434 unreachable!(
435 "BUG: {}() was called but requires {} (target_arch = \"{}\"). \
436 {}::summon() returns None on this architecture, so this function \
437 is unreachable in safe code. If you used forge_token_dangerously(), \
438 that is the bug.",
439 stringify!(#fn_name),
440 #token_type_str,
441 #arch,
442 #token_type_str,
443 )
444 }
445 }
446 } else {
447 // No specific arch (trait bounds or generic) - generate without cfg guards
448 quote! {
449 #(#attrs)*
450 #vis #sig {
451 #(#target_feature_attrs)*
452 #inline_attr
453 fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #where_clause
454 #inner_body
455
456 // SAFETY: Calling a #[target_feature] function from a non-matching context
457 // requires unsafe. The token proves the required CPU features are available.
458 unsafe { #inner_fn_name(#(#inner_args),*) }
459 }
460 }
461 };
462
463 expanded.into()
464}
465
466/// Mark a function as an arcane SIMD function.
467///
468/// This macro enables safe use of SIMD intrinsics by generating an inner function
469/// with the appropriate `#[target_feature(enable = "...")]` attributes based on
470/// the token parameter type. The outer function calls the inner function unsafely,
471/// which is justified because the token parameter proves the features are available.
472///
473/// **The token is passed through to the inner function**, so you can call other
474/// token-taking functions from inside `#[arcane]`.
475///
476/// # Token Parameter Forms
477///
478/// The macro supports four forms of token parameters:
479///
480/// ## Concrete Token Types
481///
482/// ```ignore
483/// #[arcane]
484/// fn process(token: Avx2Token, data: &[f32; 8]) -> [f32; 8] {
485/// // AVX2 intrinsics safe here
486/// }
487/// ```
488///
489/// ## impl Trait Bounds
490///
491/// ```ignore
492/// #[arcane]
493/// fn process(token: impl HasX64V2, data: &[f32; 8]) -> [f32; 8] {
494/// // Accepts any token with x86-64-v2 features (SSE4.2+)
495/// }
496/// ```
497///
498/// ## Generic Type Parameters
499///
500/// ```ignore
501/// #[arcane]
502/// fn process<T: HasX64V2>(token: T, data: &[f32; 8]) -> [f32; 8] {
503/// // Generic over any v2-capable token
504/// }
505///
506/// // Also works with where clauses:
507/// #[arcane]
508/// fn process<T>(token: T, data: &[f32; 8]) -> [f32; 8]
509/// where
510/// T: HasX64V2
511/// {
512/// // ...
513/// }
514/// ```
515///
516/// ## Methods with Self Receivers
517///
518/// Methods with `self`, `&self`, `&mut self` receivers are supported via the
519/// `_self = Type` argument. Use `_self` in the function body instead of `self`:
520///
521/// ```ignore
522/// use archmage::{X64V3Token, arcane};
523/// use wide::f32x8;
524///
525/// trait SimdOps {
526/// fn double(&self, token: X64V3Token) -> Self;
527/// fn square(self, token: X64V3Token) -> Self;
528/// fn scale(&mut self, token: X64V3Token, factor: f32);
529/// }
530///
531/// impl SimdOps for f32x8 {
532/// #[arcane(_self = f32x8)]
533/// fn double(&self, _token: X64V3Token) -> Self {
534/// // Use _self instead of self in the body
535/// *_self + *_self
536/// }
537///
538/// #[arcane(_self = f32x8)]
539/// fn square(self, _token: X64V3Token) -> Self {
540/// _self * _self
541/// }
542///
543/// #[arcane(_self = f32x8)]
544/// fn scale(&mut self, _token: X64V3Token, factor: f32) {
545/// *_self = *_self * f32x8::splat(factor);
546/// }
547/// }
548/// ```
549///
550/// **Why `_self`?** The macro generates an inner function where `self` becomes
551/// a regular parameter named `_self`. Using `_self` in your code reminds you
552/// that you're not using the normal `self` keyword.
553///
554/// **All receiver types are supported:**
555/// - `self` (by value/move) → `_self: Type`
556/// - `&self` (shared reference) → `_self: &Type`
557/// - `&mut self` (mutable reference) → `_self: &mut Type`
558///
559/// # Multiple Trait Bounds
560///
561/// When using `impl Trait` or generic bounds with multiple traits,
562/// all required features are enabled:
563///
564/// ```ignore
565/// #[arcane]
566/// fn fma_kernel(token: impl HasX64V2 + HasNeon, data: &[f32; 8]) -> [f32; 8] {
567/// // Cross-platform: SSE4.2 on x86, NEON on ARM
568/// }
569/// ```
570///
571/// # Expansion
572///
573/// The macro expands to approximately:
574///
575/// ```ignore
576/// fn process(token: Avx2Token, data: &[f32; 8]) -> [f32; 8] {
577/// #[target_feature(enable = "avx2")]
578/// #[inline]
579/// fn __simd_inner_process(token: Avx2Token, data: &[f32; 8]) -> [f32; 8] {
580/// let v = unsafe { _mm256_loadu_ps(data.as_ptr()) };
581/// let doubled = _mm256_add_ps(v, v);
582/// let mut out = [0.0f32; 8];
583/// unsafe { _mm256_storeu_ps(out.as_mut_ptr(), doubled) };
584/// out
585/// }
586/// // SAFETY: Calling #[target_feature] fn from non-matching context.
587/// // Token proves the required features are available.
588/// unsafe { __simd_inner_process(token, data) }
589/// }
590/// ```
591///
592/// # Profile Tokens
593///
594/// Profile tokens automatically enable all required features:
595///
596/// ```ignore
597/// #[arcane]
598/// fn kernel(token: X64V3Token, data: &mut [f32]) {
599/// // AVX2 + FMA + BMI1 + BMI2 intrinsics all safe here!
600/// }
601/// ```
602///
603/// # Supported Tokens
604///
605/// - **x86_64 tiers**: `X64V2Token`, `X64V3Token` / `Desktop64` / `Avx2FmaToken`,
606/// `X64V4Token` / `Avx512Token` / `Server64`, `Avx512ModernToken`, `Avx512Fp16Token`
607/// - **ARM**: `NeonToken` / `Arm64`, `NeonAesToken`, `NeonSha3Token`, `NeonCrcToken`
608/// - **WASM**: `Wasm128Token`
609///
610/// # Supported Trait Bounds
611///
612/// - **x86_64 tiers**: `HasX64V2`, `HasX64V4`
613/// - **ARM**: `HasNeon`, `HasNeonAes`, `HasNeonSha3`
614///
615/// **Preferred:** Use concrete tokens (`X64V3Token`, `Desktop64`, `NeonToken`) directly.
616/// Concrete token types also work as trait bounds (e.g., `impl X64V3Token`).
617///
618/// # Options
619///
620/// ## `inline_always`
621///
622/// Use `#[inline(always)]` instead of `#[inline]` for the inner function.
623/// This can improve performance by ensuring aggressive inlining, but requires
624/// nightly Rust with `#![feature(target_feature_inline_always)]` enabled in
625/// the crate using the macro.
626///
627/// ```ignore
628/// #![feature(target_feature_inline_always)]
629///
630/// #[arcane(inline_always)]
631/// fn fast_kernel(token: Avx2Token, data: &mut [f32]) {
632/// // Inner function will use #[inline(always)]
633/// }
634/// ```
635#[proc_macro_attribute]
636pub fn arcane(attr: TokenStream, item: TokenStream) -> TokenStream {
637 let args = parse_macro_input!(attr as ArcaneArgs);
638 let input_fn = parse_macro_input!(item as ItemFn);
639 arcane_impl(input_fn, "arcane", args)
640}
641
642/// Legacy alias for [`arcane`].
643///
644/// **Deprecated:** Use `#[arcane]` instead. This alias exists only for migration.
645#[proc_macro_attribute]
646#[doc(hidden)]
647pub fn simd_fn(attr: TokenStream, item: TokenStream) -> TokenStream {
648 let args = parse_macro_input!(attr as ArcaneArgs);
649 let input_fn = parse_macro_input!(item as ItemFn);
650 arcane_impl(input_fn, "simd_fn", args)
651}
652
653// ============================================================================
654// Rite macro for inner SIMD functions (inlines into matching #[target_feature] callers)
655// ============================================================================
656
657/// Annotate inner SIMD helpers called from `#[arcane]` functions.
658///
659/// Unlike `#[arcane]`, which creates an inner `#[target_feature]` function behind
660/// a safe boundary, `#[rite]` adds `#[target_feature]` and `#[inline]` directly.
661/// LLVM inlines it into any caller with matching features — no boundary crossing.
662///
663/// # When to Use
664///
665/// Use `#[rite]` for helper functions that are **only** called from within
666/// `#[arcane]` functions with matching or superset token types:
667///
668/// ```ignore
669/// use archmage::{arcane, rite, X64V3Token};
670///
671/// #[arcane]
672/// fn outer(token: X64V3Token, data: &[f32; 8]) -> f32 {
673/// // helper inlines — same target features, no boundary
674/// helper(token, data) * 2.0
675/// }
676///
677/// #[rite]
678/// fn helper(token: X64V3Token, data: &[f32; 8]) -> f32 {
679/// // Just has #[target_feature(enable = "avx2,fma,...")]
680/// // Called from #[arcane] context, so features are guaranteed
681/// let v = f32x8::from_array(token, *data);
682/// v.reduce_add()
683/// }
684/// ```
685///
686/// # Safety
687///
688/// `#[rite]` functions can only be safely called from contexts where the
689/// required CPU features are enabled:
690/// - From within `#[arcane]` functions with matching/superset tokens
691/// - From within other `#[rite]` functions with matching/superset tokens
692/// - From code compiled with `-Ctarget-cpu` that enables the features
693///
694/// Calling from other contexts requires `unsafe` and the caller must ensure
695/// the CPU supports the required features.
696///
697/// # Comparison with #[arcane]
698///
699/// | Aspect | `#[arcane]` | `#[rite]` |
700/// |--------|-------------|-----------|
701/// | Creates wrapper | Yes | No |
702/// | Entry point | Yes | No |
703/// | Inlines into caller | No (barrier) | Yes |
704/// | Safe to call anywhere | Yes (with token) | Only from feature-enabled context |
705#[proc_macro_attribute]
706pub fn rite(attr: TokenStream, item: TokenStream) -> TokenStream {
707 // Parse optional arguments (currently just inline_always)
708 let args = parse_macro_input!(attr as RiteArgs);
709 let input_fn = parse_macro_input!(item as ItemFn);
710 rite_impl(input_fn, args)
711}
712
713/// Arguments for the `#[rite]` macro.
714///
715/// Currently empty - `#[inline(always)]` is not supported because
716/// `#[inline(always)]` + `#[target_feature]` requires nightly Rust.
717/// The regular `#[inline]` hint is sufficient when called from
718/// matching `#[target_feature]` contexts.
719#[derive(Default)]
720struct RiteArgs {
721 // No options currently - inline_always doesn't work on stable
722}
723
724impl Parse for RiteArgs {
725 fn parse(input: ParseStream) -> syn::Result<Self> {
726 if !input.is_empty() {
727 let ident: Ident = input.parse()?;
728 return Err(syn::Error::new(
729 ident.span(),
730 "#[rite] takes no arguments. Note: inline_always is not supported \
731 because #[inline(always)] + #[target_feature] requires nightly Rust.",
732 ));
733 }
734 Ok(RiteArgs::default())
735 }
736}
737
738/// Implementation for the `#[rite]` macro.
739fn rite_impl(mut input_fn: ItemFn, args: RiteArgs) -> TokenStream {
740 // Find the token parameter and its features
741 let TokenParamInfo {
742 features,
743 target_arch,
744 ..
745 } = match find_token_param(&input_fn.sig) {
746 Some(result) => result,
747 None => {
748 let msg = "rite requires a token parameter. Supported forms:\n\
749 - Concrete: `token: X64V3Token`\n\
750 - impl Trait: `token: impl HasX64V2`\n\
751 - Generic: `fn foo<T: HasX64V2>(token: T, ...)`";
752 return syn::Error::new_spanned(&input_fn.sig, msg)
753 .to_compile_error()
754 .into();
755 }
756 };
757
758 // Build target_feature attributes
759 let target_feature_attrs: Vec<Attribute> = features
760 .iter()
761 .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
762 .collect();
763
764 // Always use #[inline] - #[inline(always)] + #[target_feature] requires nightly
765 let _ = args; // RiteArgs is currently empty but kept for future extensibility
766 let inline_attr: Attribute = parse_quote!(#[inline]);
767
768 // Prepend attributes to the function
769 let mut new_attrs = target_feature_attrs;
770 new_attrs.push(inline_attr);
771 new_attrs.append(&mut input_fn.attrs);
772 input_fn.attrs = new_attrs;
773
774 // If we know the target arch, generate cfg-gated impl + stub
775 if let Some(arch) = target_arch {
776 let vis = &input_fn.vis;
777 let sig = &input_fn.sig;
778 let attrs = &input_fn.attrs;
779 let block = &input_fn.block;
780
781 quote! {
782 #[cfg(target_arch = #arch)]
783 #(#attrs)*
784 #vis #sig
785 #block
786
787 #[cfg(not(target_arch = #arch))]
788 #vis #sig {
789 unreachable!(concat!(
790 "This function requires ",
791 #arch,
792 " architecture"
793 ))
794 }
795 }
796 .into()
797 } else {
798 // No specific arch (trait bounds) - just emit the annotated function
799 quote!(#input_fn).into()
800 }
801}
802
803// =============================================================================
804// magetypes! macro - generate platform variants from generic function
805// =============================================================================
806
807/// Configuration for a magetypes variant
808struct MagetypesVariant {
809 suffix: &'static str,
810 token_type: &'static str,
811 target_arch: Option<&'static str>,
812 cargo_feature: Option<&'static str>,
813}
814
815const MAGETYPES_VARIANTS: &[MagetypesVariant] = &[
816 // x86_64 V3 (AVX2)
817 MagetypesVariant {
818 suffix: "v3",
819 token_type: "archmage::X64V3Token",
820 target_arch: Some("x86_64"),
821 cargo_feature: None,
822 },
823 // x86_64 V4 (AVX-512)
824 MagetypesVariant {
825 suffix: "v4",
826 token_type: "archmage::X64V4Token",
827 target_arch: Some("x86_64"),
828 cargo_feature: Some("avx512"),
829 },
830 // aarch64 NEON
831 MagetypesVariant {
832 suffix: "neon",
833 token_type: "archmage::NeonToken",
834 target_arch: Some("aarch64"),
835 cargo_feature: None,
836 },
837 // wasm32 SIMD128
838 MagetypesVariant {
839 suffix: "wasm128",
840 token_type: "archmage::Wasm128Token",
841 target_arch: Some("wasm32"),
842 cargo_feature: None,
843 },
844 // Scalar fallback
845 MagetypesVariant {
846 suffix: "scalar",
847 token_type: "archmage::ScalarToken",
848 target_arch: None, // Always available
849 cargo_feature: None,
850 },
851];
852
853/// Generate platform-specific variants from a function by replacing `Token`.
854///
855/// Use `Token` as a placeholder for the token type. The macro generates
856/// suffixed variants (`_v3`, `_v4`, `_neon`, `_wasm128`, `_scalar`) with
857/// `Token` replaced by the concrete token type, and each variant wrapped
858/// in the appropriate `#[cfg(target_arch = ...)]` guard.
859///
860/// # What gets replaced
861///
862/// **Only `Token`** is replaced — with the concrete token type for each variant
863/// (e.g., `archmage::X64V3Token`, `archmage::ScalarToken`). SIMD types like
864/// `f32x8` and constants like `LANES` are **not** replaced by this macro.
865///
866/// This means `#[magetypes]` works well for functions that only need the token
867/// (e.g., to pass to other functions), but not for functions that use
868/// platform-specific SIMD types directly. For those, write `_v3` and `_scalar`
869/// variants manually and use `incant!` for dispatch.
870///
871/// # Example
872///
873/// ```rust,ignore
874/// use archmage::magetypes;
875///
876/// // Works: function only uses Token, no SIMD types
877/// #[magetypes]
878/// fn process(token: Token, data: &[f32]) -> f32 {
879/// // delegates to other functions that handle SIMD internally
880/// inner_simd_work(token, data)
881/// }
882///
883/// // Generates:
884/// // - process_v3(token: X64V3Token, ...) — #[cfg(target_arch = "x86_64")]
885/// // - process_v4(token: X64V4Token, ...) — #[cfg(target_arch = "x86_64", feature = "avx512")]
886/// // - process_neon(token: NeonToken, ...) — #[cfg(target_arch = "aarch64")]
887/// // - process_wasm128(token: Wasm128Token, ...) — #[cfg(target_arch = "wasm32")]
888/// // - process_scalar(token: ScalarToken, ...) — always available
889/// ```
890///
891/// # Usage with incant!
892///
893/// The generated variants work with `incant!` for dispatch:
894///
895/// ```rust,ignore
896/// pub fn process_api(data: &[f32]) -> f32 {
897/// incant!(process(data))
898/// }
899/// ```
900#[proc_macro_attribute]
901pub fn magetypes(attr: TokenStream, item: TokenStream) -> TokenStream {
902 // Ignore attributes for now (could add variant selection later)
903 let _ = attr;
904 let input_fn = parse_macro_input!(item as ItemFn);
905 magetypes_impl(input_fn)
906}
907
908fn magetypes_impl(input_fn: ItemFn) -> TokenStream {
909 let fn_name = &input_fn.sig.ident;
910 let fn_attrs = &input_fn.attrs;
911
912 // Convert function to string for text substitution
913 let fn_str = input_fn.to_token_stream().to_string();
914
915 let mut variants = Vec::new();
916
917 for variant in MAGETYPES_VARIANTS {
918 // Create suffixed function name
919 let suffixed_name = format!("{}_{}", fn_name, variant.suffix);
920
921 // Do text substitution
922 let mut variant_str = fn_str.clone();
923
924 // Replace function name
925 variant_str = variant_str.replacen(&fn_name.to_string(), &suffixed_name, 1);
926
927 // Replace Token type with concrete token
928 variant_str = variant_str.replace("Token", variant.token_type);
929
930 // Parse back to tokens
931 let variant_tokens: proc_macro2::TokenStream = match variant_str.parse() {
932 Ok(t) => t,
933 Err(e) => {
934 return syn::Error::new_spanned(
935 &input_fn,
936 format!(
937 "Failed to parse generated variant `{}`: {}",
938 suffixed_name, e
939 ),
940 )
941 .to_compile_error()
942 .into();
943 }
944 };
945
946 // Add cfg guards
947 let cfg_guard = match (variant.target_arch, variant.cargo_feature) {
948 (Some(arch), Some(feature)) => {
949 quote! { #[cfg(all(target_arch = #arch, feature = #feature))] }
950 }
951 (Some(arch), None) => {
952 quote! { #[cfg(target_arch = #arch)] }
953 }
954 (None, Some(feature)) => {
955 quote! { #[cfg(feature = #feature)] }
956 }
957 (None, None) => {
958 quote! {} // No guard needed (scalar)
959 }
960 };
961
962 variants.push(quote! {
963 #cfg_guard
964 #variant_tokens
965 });
966 }
967
968 // Remove attributes from the list that should not be duplicated
969 let filtered_attrs: Vec<_> = fn_attrs
970 .iter()
971 .filter(|a| !a.path().is_ident("magetypes"))
972 .collect();
973
974 let output = quote! {
975 #(#filtered_attrs)*
976 #(#variants)*
977 };
978
979 output.into()
980}
981
982// =============================================================================
983// incant! macro - dispatch to platform-specific variants
984// =============================================================================
985
986/// Input for the incant! macro
987struct IncantInput {
988 /// Function name to call
989 func_name: Ident,
990 /// Arguments to pass
991 args: Vec<syn::Expr>,
992 /// Optional token variable for passthrough mode
993 with_token: Option<syn::Expr>,
994}
995
996impl Parse for IncantInput {
997 fn parse(input: ParseStream) -> syn::Result<Self> {
998 // Parse: function_name(arg1, arg2, ...) [with token_expr]
999 let func_name: Ident = input.parse()?;
1000
1001 // Parse parenthesized arguments
1002 let content;
1003 syn::parenthesized!(content in input);
1004 let args = content
1005 .parse_terminated(syn::Expr::parse, Token![,])?
1006 .into_iter()
1007 .collect();
1008
1009 // Check for optional "with token"
1010 let with_token = if input.peek(Ident) {
1011 let kw: Ident = input.parse()?;
1012 if kw != "with" {
1013 return Err(syn::Error::new_spanned(kw, "expected `with` keyword"));
1014 }
1015 Some(input.parse()?)
1016 } else {
1017 None
1018 };
1019
1020 Ok(IncantInput {
1021 func_name,
1022 args,
1023 with_token,
1024 })
1025 }
1026}
1027
1028/// Dispatch to platform-specific SIMD variants.
1029///
1030/// # Entry Point Mode (no token yet)
1031///
1032/// Summons tokens and dispatches to the best available variant:
1033///
1034/// ```rust,ignore
1035/// pub fn public_api(data: &[f32]) -> f32 {
1036/// incant!(dot(data))
1037/// }
1038/// ```
1039///
1040/// Expands to runtime feature detection + dispatch to `dot_v3`, `dot_v4`,
1041/// `dot_neon`, `dot_wasm128`, or `dot_scalar`.
1042///
1043/// # Passthrough Mode (already have token)
1044///
1045/// Uses compile-time dispatch via `IntoConcreteToken`:
1046///
1047/// ```rust,ignore
1048/// #[arcane]
1049/// fn outer(token: X64V3Token, data: &[f32]) -> f32 {
1050/// incant!(inner(data) with token)
1051/// }
1052/// ```
1053///
1054/// The compiler monomorphizes the dispatch, eliminating non-matching branches.
1055///
1056/// # Variant Naming
1057///
1058/// Functions must have suffixed variants:
1059/// - `_v3` for `X64V3Token`
1060/// - `_v4` for `X64V4Token` (requires `avx512` feature)
1061/// - `_neon` for `NeonToken`
1062/// - `_wasm128` for `Wasm128Token`
1063/// - `_scalar` for `ScalarToken`
1064#[proc_macro]
1065pub fn incant(input: TokenStream) -> TokenStream {
1066 let input = parse_macro_input!(input as IncantInput);
1067 incant_impl(input)
1068}
1069
1070/// Legacy alias for [`incant!`].
1071#[proc_macro]
1072pub fn simd_route(input: TokenStream) -> TokenStream {
1073 let input = parse_macro_input!(input as IncantInput);
1074 incant_impl(input)
1075}
1076
1077fn incant_impl(input: IncantInput) -> TokenStream {
1078 let func_name = &input.func_name;
1079 let args = &input.args;
1080
1081 // Create suffixed function names
1082 let fn_v3 = format_ident!("{}_v3", func_name);
1083 let fn_v4 = format_ident!("{}_v4", func_name);
1084 let fn_neon = format_ident!("{}_neon", func_name);
1085 let fn_wasm128 = format_ident!("{}_wasm128", func_name);
1086 let fn_scalar = format_ident!("{}_scalar", func_name);
1087
1088 // Use labeled blocks instead of `return` so incant! can be chained.
1089 // Labeled blocks are stable since Rust 1.65.
1090 if let Some(token_expr) = &input.with_token {
1091 // Passthrough mode: use IntoConcreteToken for compile-time dispatch
1092 let expanded = quote! {
1093 '__incant: {
1094 use archmage::IntoConcreteToken;
1095 let __incant_token = #token_expr;
1096
1097 #[cfg(target_arch = "x86_64")]
1098 {
1099 #[cfg(feature = "avx512")]
1100 if let Some(__t) = __incant_token.as_x64v4() {
1101 break '__incant #fn_v4(__t, #(#args),*);
1102 }
1103 if let Some(__t) = __incant_token.as_x64v3() {
1104 break '__incant #fn_v3(__t, #(#args),*);
1105 }
1106 }
1107
1108 #[cfg(target_arch = "aarch64")]
1109 if let Some(__t) = __incant_token.as_neon() {
1110 break '__incant #fn_neon(__t, #(#args),*);
1111 }
1112
1113 #[cfg(target_arch = "wasm32")]
1114 if let Some(__t) = __incant_token.as_wasm128() {
1115 break '__incant #fn_wasm128(__t, #(#args),*);
1116 }
1117
1118 if let Some(__t) = __incant_token.as_scalar() {
1119 break '__incant #fn_scalar(__t, #(#args),*);
1120 }
1121
1122 unreachable!("Token did not match any known variant")
1123 }
1124 };
1125 expanded.into()
1126 } else {
1127 // Entry point mode: summon tokens and dispatch
1128 let expanded = quote! {
1129 '__incant: {
1130 use archmage::SimdToken;
1131
1132 #[cfg(target_arch = "x86_64")]
1133 {
1134 #[cfg(feature = "avx512")]
1135 if let Some(__t) = archmage::X64V4Token::summon() {
1136 break '__incant #fn_v4(__t, #(#args),*);
1137 }
1138 if let Some(__t) = archmage::X64V3Token::summon() {
1139 break '__incant #fn_v3(__t, #(#args),*);
1140 }
1141 }
1142
1143 #[cfg(target_arch = "aarch64")]
1144 if let Some(__t) = archmage::NeonToken::summon() {
1145 break '__incant #fn_neon(__t, #(#args),*);
1146 }
1147
1148 #[cfg(target_arch = "wasm32")]
1149 if let Some(__t) = archmage::Wasm128Token::summon() {
1150 break '__incant #fn_wasm128(__t, #(#args),*);
1151 }
1152
1153 // Scalar fallback
1154 #fn_scalar(archmage::ScalarToken, #(#args),*)
1155 }
1156 };
1157 expanded.into()
1158 }
1159}
1160
1161// =============================================================================
1162// Unit tests for token/trait recognition maps
1163// =============================================================================
1164
1165#[cfg(test)]
1166mod tests {
1167 use super::*;
1168
1169 use super::generated::{ALL_CONCRETE_TOKENS, ALL_TRAIT_NAMES};
1170
1171 #[test]
1172 fn every_concrete_token_is_in_token_to_features() {
1173 for &name in ALL_CONCRETE_TOKENS {
1174 assert!(
1175 token_to_features(name).is_some(),
1176 "Token `{}` exists in runtime crate but is NOT recognized by \
1177 token_to_features() in the proc macro. Add it!",
1178 name
1179 );
1180 }
1181 }
1182
1183 #[test]
1184 fn every_trait_is_in_trait_to_features() {
1185 for &name in ALL_TRAIT_NAMES {
1186 assert!(
1187 trait_to_features(name).is_some(),
1188 "Trait `{}` exists in runtime crate but is NOT recognized by \
1189 trait_to_features() in the proc macro. Add it!",
1190 name
1191 );
1192 }
1193 }
1194
1195 #[test]
1196 fn token_aliases_map_to_same_features() {
1197 // Desktop64 = X64V3Token
1198 assert_eq!(
1199 token_to_features("Desktop64"),
1200 token_to_features("X64V3Token"),
1201 "Desktop64 and X64V3Token should map to identical features"
1202 );
1203
1204 // Server64 = X64V4Token = Avx512Token
1205 assert_eq!(
1206 token_to_features("Server64"),
1207 token_to_features("X64V4Token"),
1208 "Server64 and X64V4Token should map to identical features"
1209 );
1210 assert_eq!(
1211 token_to_features("X64V4Token"),
1212 token_to_features("Avx512Token"),
1213 "X64V4Token and Avx512Token should map to identical features"
1214 );
1215
1216 // Arm64 = NeonToken
1217 assert_eq!(
1218 token_to_features("Arm64"),
1219 token_to_features("NeonToken"),
1220 "Arm64 and NeonToken should map to identical features"
1221 );
1222 }
1223
1224 #[test]
1225 fn trait_to_features_includes_tokens_as_bounds() {
1226 // Tier tokens should also work as trait bounds
1227 // (for `impl X64V3Token` patterns, even though Rust won't allow it,
1228 // the macro processes AST before type checking)
1229 let tier_tokens = [
1230 "X64V2Token",
1231 "X64V3Token",
1232 "Desktop64",
1233 "Avx2FmaToken",
1234 "X64V4Token",
1235 "Avx512Token",
1236 "Server64",
1237 "Avx512ModernToken",
1238 "Avx512Fp16Token",
1239 "NeonToken",
1240 "Arm64",
1241 "NeonAesToken",
1242 "NeonSha3Token",
1243 "NeonCrcToken",
1244 ];
1245
1246 for &name in &tier_tokens {
1247 assert!(
1248 trait_to_features(name).is_some(),
1249 "Tier token `{}` should also be recognized in trait_to_features() \
1250 for use as a generic bound. Add it!",
1251 name
1252 );
1253 }
1254 }
1255
1256 #[test]
1257 fn trait_features_are_cumulative() {
1258 // HasX64V4 should include all HasX64V2 features plus more
1259 let v2_features = trait_to_features("HasX64V2").unwrap();
1260 let v4_features = trait_to_features("HasX64V4").unwrap();
1261
1262 for &f in v2_features {
1263 assert!(
1264 v4_features.contains(&f),
1265 "HasX64V4 should include v2 feature `{}` but doesn't",
1266 f
1267 );
1268 }
1269
1270 // v4 should have more features than v2
1271 assert!(
1272 v4_features.len() > v2_features.len(),
1273 "HasX64V4 should have more features than HasX64V2"
1274 );
1275 }
1276
1277 #[test]
1278 fn x64v3_trait_features_include_v2() {
1279 // X64V3Token as trait bound should include v2 features
1280 let v2 = trait_to_features("HasX64V2").unwrap();
1281 let v3 = trait_to_features("X64V3Token").unwrap();
1282
1283 for &f in v2 {
1284 assert!(
1285 v3.contains(&f),
1286 "X64V3Token trait features should include v2 feature `{}` but don't",
1287 f
1288 );
1289 }
1290 }
1291
1292 #[test]
1293 fn has_neon_aes_includes_neon() {
1294 let neon = trait_to_features("HasNeon").unwrap();
1295 let neon_aes = trait_to_features("HasNeonAes").unwrap();
1296
1297 for &f in neon {
1298 assert!(
1299 neon_aes.contains(&f),
1300 "HasNeonAes should include NEON feature `{}`",
1301 f
1302 );
1303 }
1304 }
1305
1306 #[test]
1307 fn no_removed_traits_are_recognized() {
1308 // These traits were removed in 0.3.0 and should NOT be recognized
1309 let removed = [
1310 "HasSse",
1311 "HasSse2",
1312 "HasSse41",
1313 "HasSse42",
1314 "HasAvx",
1315 "HasAvx2",
1316 "HasFma",
1317 "HasAvx512f",
1318 "HasAvx512bw",
1319 "HasAvx512vl",
1320 "HasAvx512vbmi2",
1321 "HasSve",
1322 "HasSve2",
1323 ];
1324
1325 for &name in &removed {
1326 assert!(
1327 trait_to_features(name).is_none(),
1328 "Removed trait `{}` should NOT be in trait_to_features(). \
1329 It was removed in 0.3.0 — users should migrate to tier traits.",
1330 name
1331 );
1332 }
1333 }
1334
1335 #[test]
1336 fn no_nonexistent_tokens_are_recognized() {
1337 // These tokens don't exist and should NOT be recognized
1338 let fake = [
1339 "Sse2Token",
1340 "SveToken",
1341 "Sve2Token",
1342 "Avx512VnniToken",
1343 "X64V4ModernToken",
1344 "NeonFp16Token",
1345 ];
1346
1347 for &name in &fake {
1348 assert!(
1349 token_to_features(name).is_none(),
1350 "Non-existent token `{}` should NOT be in token_to_features()",
1351 name
1352 );
1353 }
1354 }
1355}