archmage_macros/lib.rs
1//! Proc-macros for archmage SIMD capability tokens.
2//!
3//! Provides `#[arcane]` attribute (with `#[arcane]` alias) to make raw intrinsics
4//! safe via token proof.
5
6use proc_macro::TokenStream;
7use quote::{ToTokens, format_ident, quote};
8use syn::{
9 Attribute, FnArg, GenericParam, Ident, ItemFn, PatType, ReturnType, Signature, Token, Type,
10 TypeParamBound,
11 fold::Fold,
12 parse::{Parse, ParseStream},
13 parse_macro_input, parse_quote,
14};
15
16/// A Fold implementation that replaces `Self` with a concrete type.
17struct ReplaceSelf<'a> {
18 replacement: &'a Type,
19}
20
21impl Fold for ReplaceSelf<'_> {
22 fn fold_type(&mut self, ty: Type) -> Type {
23 match ty {
24 Type::Path(ref type_path) if type_path.qself.is_none() => {
25 // Check if it's just `Self`
26 if type_path.path.is_ident("Self") {
27 return self.replacement.clone();
28 }
29 // Otherwise continue folding
30 syn::fold::fold_type(self, ty)
31 }
32 _ => syn::fold::fold_type(self, ty),
33 }
34 }
35}
36
37/// Arguments to the `#[arcane]` macro.
38#[derive(Default)]
39struct ArcaneArgs {
40 /// Use `#[inline(always)]` instead of `#[inline]` for the inner function.
41 /// Requires nightly Rust with `#![feature(target_feature_inline_always)]`.
42 inline_always: bool,
43 /// The concrete type to use for `self` receiver.
44 /// When specified, `self`/`&self`/`&mut self` is transformed to `_self: Type`/`&Type`/`&mut Type`.
45 self_type: Option<Type>,
46}
47
48impl Parse for ArcaneArgs {
49 fn parse(input: ParseStream) -> syn::Result<Self> {
50 let mut args = ArcaneArgs::default();
51
52 while !input.is_empty() {
53 let ident: Ident = input.parse()?;
54 match ident.to_string().as_str() {
55 "inline_always" => args.inline_always = true,
56 "_self" => {
57 let _: Token![=] = input.parse()?;
58 args.self_type = Some(input.parse()?);
59 }
60 other => {
61 return Err(syn::Error::new(
62 ident.span(),
63 format!("unknown arcane argument: `{}`", other),
64 ));
65 }
66 }
67 // Consume optional comma
68 if input.peek(Token![,]) {
69 let _: Token![,] = input.parse()?;
70 }
71 }
72
73 Ok(args)
74 }
75}
76
77// Token-to-features and trait-to-features mappings are generated from
78// token-registry.toml by xtask. Regenerate with: cargo run -p xtask -- generate
79mod generated;
80use generated::{token_to_arch, token_to_features, trait_to_features};
81
82/// Result of extracting token info from a type.
83enum TokenTypeInfo {
84 /// Concrete token type (e.g., `Avx2Token`)
85 Concrete(String),
86 /// impl Trait with the trait names (e.g., `impl HasX64V2`)
87 ImplTrait(Vec<String>),
88 /// Generic type parameter name (e.g., `T`)
89 Generic(String),
90}
91
92/// Extract token type information from a type.
93fn extract_token_type_info(ty: &Type) -> Option<TokenTypeInfo> {
94 match ty {
95 Type::Path(type_path) => {
96 // Get the last segment of the path (e.g., "Avx2Token" from "archmage::Avx2Token")
97 type_path.path.segments.last().map(|seg| {
98 let name = seg.ident.to_string();
99 // Check if it's a known concrete token type
100 if token_to_features(&name).is_some() {
101 TokenTypeInfo::Concrete(name)
102 } else {
103 // Might be a generic type parameter like `T`
104 TokenTypeInfo::Generic(name)
105 }
106 })
107 }
108 Type::Reference(type_ref) => {
109 // Handle &Token or &mut Token
110 extract_token_type_info(&type_ref.elem)
111 }
112 Type::ImplTrait(impl_trait) => {
113 // Handle `impl HasX64V2` or `impl HasX64V2 + HasNeon`
114 let traits: Vec<String> = extract_trait_names_from_bounds(&impl_trait.bounds);
115 if traits.is_empty() {
116 None
117 } else {
118 Some(TokenTypeInfo::ImplTrait(traits))
119 }
120 }
121 _ => None,
122 }
123}
124
125/// Extract trait names from type param bounds.
126fn extract_trait_names_from_bounds(
127 bounds: &syn::punctuated::Punctuated<TypeParamBound, Token![+]>,
128) -> Vec<String> {
129 bounds
130 .iter()
131 .filter_map(|bound| {
132 if let TypeParamBound::Trait(trait_bound) = bound {
133 trait_bound
134 .path
135 .segments
136 .last()
137 .map(|seg| seg.ident.to_string())
138 } else {
139 None
140 }
141 })
142 .collect()
143}
144
145/// Look up a generic type parameter in the function's generics.
146fn find_generic_bounds(sig: &Signature, type_name: &str) -> Option<Vec<String>> {
147 // Check inline bounds first (e.g., `fn foo<T: HasX64V2>(token: T)`)
148 for param in &sig.generics.params {
149 if let GenericParam::Type(type_param) = param
150 && type_param.ident == type_name
151 {
152 let traits = extract_trait_names_from_bounds(&type_param.bounds);
153 if !traits.is_empty() {
154 return Some(traits);
155 }
156 }
157 }
158
159 // Check where clause (e.g., `fn foo<T>(token: T) where T: HasX64V2`)
160 if let Some(where_clause) = &sig.generics.where_clause {
161 for predicate in &where_clause.predicates {
162 if let syn::WherePredicate::Type(pred_type) = predicate
163 && let Type::Path(type_path) = &pred_type.bounded_ty
164 && let Some(seg) = type_path.path.segments.last()
165 && seg.ident == type_name
166 {
167 let traits = extract_trait_names_from_bounds(&pred_type.bounds);
168 if !traits.is_empty() {
169 return Some(traits);
170 }
171 }
172 }
173 }
174
175 None
176}
177
178/// Convert trait names to features, collecting all features from all traits.
179fn traits_to_features(trait_names: &[String]) -> Option<Vec<&'static str>> {
180 let mut all_features = Vec::new();
181
182 for trait_name in trait_names {
183 if let Some(features) = trait_to_features(trait_name) {
184 for &feature in features {
185 if !all_features.contains(&feature) {
186 all_features.push(feature);
187 }
188 }
189 }
190 }
191
192 if all_features.is_empty() {
193 None
194 } else {
195 Some(all_features)
196 }
197}
198
199/// Trait names that don't map to any CPU features. These are valid in the type
200/// system but cannot be used as token bounds in `#[arcane]`/`#[rite]` because
201/// the macros need concrete features to generate `#[target_feature]` attributes.
202const FEATURELESS_TRAIT_NAMES: &[&str] = &["SimdToken", "IntoConcreteToken"];
203
204/// Check if any trait names are featureless (no CPU feature mapping).
205/// Returns the first featureless trait name found.
206fn find_featureless_trait(trait_names: &[String]) -> Option<&'static str> {
207 for name in trait_names {
208 for &featureless in FEATURELESS_TRAIT_NAMES {
209 if name == featureless {
210 return Some(featureless);
211 }
212 }
213 }
214 None
215}
216
217/// Diagnose why `find_token_param` failed. Returns the name of a featureless
218/// trait if the signature has a parameter bounded by one (e.g., `SimdToken`).
219fn diagnose_featureless_token(sig: &Signature) -> Option<&'static str> {
220 for arg in &sig.inputs {
221 if let FnArg::Typed(PatType { ty, .. }) = arg
222 && let Some(info) = extract_token_type_info(ty)
223 {
224 match &info {
225 TokenTypeInfo::ImplTrait(names) => {
226 if let Some(name) = find_featureless_trait(names) {
227 return Some(name);
228 }
229 }
230 TokenTypeInfo::Generic(type_name) => {
231 // Check if the type name itself is a featureless trait
232 // (e.g., `token: SimdToken` used as a bare path)
233 let as_vec = vec![type_name.clone()];
234 if let Some(name) = find_featureless_trait(&as_vec) {
235 return Some(name);
236 }
237 // Check generic bounds (e.g., `T: SimdToken`)
238 if let Some(bounds) = find_generic_bounds(sig, type_name)
239 && let Some(name) = find_featureless_trait(&bounds)
240 {
241 return Some(name);
242 }
243 }
244 TokenTypeInfo::Concrete(_) => {}
245 }
246 }
247 }
248 None
249}
250
251/// Result of finding a token parameter in a function signature.
252struct TokenParamInfo {
253 /// The parameter identifier (e.g., `token`)
254 ident: Ident,
255 /// Target features to enable (e.g., `["avx2", "fma"]`)
256 features: Vec<&'static str>,
257 /// Target architecture (Some for concrete tokens, None for traits/generics)
258 target_arch: Option<&'static str>,
259 /// Concrete token type name (Some for concrete tokens, None for traits/generics)
260 token_type_name: Option<String>,
261}
262
263/// Find the first token parameter in a function signature.
264fn find_token_param(sig: &Signature) -> Option<TokenParamInfo> {
265 for arg in &sig.inputs {
266 match arg {
267 FnArg::Receiver(_) => {
268 // Self receivers (self, &self, &mut self) are not yet supported.
269 // The macro creates an inner function, and Rust's inner functions
270 // cannot have `self` parameters. Supporting this would require
271 // AST rewriting to replace `self` with a regular parameter.
272 // See the module docs for the workaround.
273 continue;
274 }
275 FnArg::Typed(PatType { pat, ty, .. }) => {
276 if let Some(info) = extract_token_type_info(ty) {
277 let (features, arch, token_name) = match info {
278 TokenTypeInfo::Concrete(ref name) => {
279 let features = token_to_features(name).map(|f| f.to_vec());
280 let arch = token_to_arch(name);
281 (features, arch, Some(name.clone()))
282 }
283 TokenTypeInfo::ImplTrait(trait_names) => {
284 (traits_to_features(&trait_names), None, None)
285 }
286 TokenTypeInfo::Generic(type_name) => {
287 // Look up the generic parameter's bounds
288 let features = find_generic_bounds(sig, &type_name)
289 .and_then(|traits| traits_to_features(&traits));
290 (features, None, None)
291 }
292 };
293
294 if let Some(features) = features {
295 // Extract parameter name
296 if let syn::Pat::Ident(pat_ident) = pat.as_ref() {
297 return Some(TokenParamInfo {
298 ident: pat_ident.ident.clone(),
299 features,
300 target_arch: arch,
301 token_type_name: token_name,
302 });
303 }
304 }
305 }
306 }
307 }
308 }
309 None
310}
311
312/// Represents the kind of self receiver and the transformed parameter.
313enum SelfReceiver {
314 /// `self` (by value/move)
315 Owned,
316 /// `&self` (shared reference)
317 Ref,
318 /// `&mut self` (mutable reference)
319 RefMut,
320}
321
322/// Shared implementation for arcane/arcane macros.
323fn arcane_impl(input_fn: ItemFn, macro_name: &str, args: ArcaneArgs) -> TokenStream {
324 // Check for self receiver
325 let has_self_receiver = input_fn
326 .sig
327 .inputs
328 .first()
329 .map(|arg| matches!(arg, FnArg::Receiver(_)))
330 .unwrap_or(false);
331
332 // If there's a self receiver, we need _self = Type
333 if has_self_receiver && args.self_type.is_none() {
334 let msg = format!(
335 "{} with self receiver requires `_self = Type` argument.\n\
336 Example: #[{}(_self = MyType)]\n\
337 Use `_self` (not `self`) in the function body to refer to self.",
338 macro_name, macro_name
339 );
340 return syn::Error::new_spanned(&input_fn.sig, msg)
341 .to_compile_error()
342 .into();
343 }
344
345 // Find the token parameter, its features, target arch, and token type name
346 let TokenParamInfo {
347 ident: _token_ident,
348 features,
349 target_arch,
350 token_type_name,
351 } = match find_token_param(&input_fn.sig) {
352 Some(result) => result,
353 None => {
354 // Check for specific misuse: featureless traits like SimdToken
355 if let Some(trait_name) = diagnose_featureless_token(&input_fn.sig) {
356 let msg = format!(
357 "`{trait_name}` cannot be used as a token bound in #[{macro_name}] \
358 because it doesn't specify any CPU features.\n\
359 \n\
360 #[{macro_name}] needs concrete features to generate #[target_feature]. \
361 Use a concrete token or a feature trait:\n\
362 \n\
363 Concrete tokens: X64V3Token, Desktop64, NeonToken, Arm64V2Token, ...\n\
364 Feature traits: impl HasX64V2, impl HasNeon, impl HasArm64V3, ..."
365 );
366 return syn::Error::new_spanned(&input_fn.sig, msg)
367 .to_compile_error()
368 .into();
369 }
370 let msg = format!(
371 "{} requires a token parameter. Supported forms:\n\
372 - Concrete: `token: X64V3Token`\n\
373 - impl Trait: `token: impl HasX64V2`\n\
374 - Generic: `fn foo<T: HasX64V2>(token: T, ...)`\n\
375 - With self: `#[{}(_self = Type)] fn method(&self, token: impl HasNeon, ...)`",
376 macro_name, macro_name
377 );
378 return syn::Error::new_spanned(&input_fn.sig, msg)
379 .to_compile_error()
380 .into();
381 }
382 };
383
384 // Build target_feature attributes
385 let target_feature_attrs: Vec<Attribute> = features
386 .iter()
387 .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
388 .collect();
389
390 // Extract function components
391 let vis = &input_fn.vis;
392 let sig = &input_fn.sig;
393 let fn_name = &sig.ident;
394 let generics = &sig.generics;
395 let where_clause = &generics.where_clause;
396 let inputs = &sig.inputs;
397 let output = &sig.output;
398 let body = &input_fn.block;
399 let attrs = &input_fn.attrs;
400
401 // Determine self receiver type if present
402 let self_receiver_kind: Option<SelfReceiver> = inputs.first().and_then(|arg| match arg {
403 FnArg::Receiver(receiver) => {
404 if receiver.reference.is_none() {
405 Some(SelfReceiver::Owned)
406 } else if receiver.mutability.is_some() {
407 Some(SelfReceiver::RefMut)
408 } else {
409 Some(SelfReceiver::Ref)
410 }
411 }
412 _ => None,
413 });
414
415 // Build inner function parameters, transforming self if needed
416 let inner_params: Vec<proc_macro2::TokenStream> = inputs
417 .iter()
418 .map(|arg| match arg {
419 FnArg::Receiver(_) => {
420 // Transform self receiver to _self parameter
421 let self_ty = args.self_type.as_ref().unwrap();
422 match self_receiver_kind.as_ref().unwrap() {
423 SelfReceiver::Owned => quote!(_self: #self_ty),
424 SelfReceiver::Ref => quote!(_self: &#self_ty),
425 SelfReceiver::RefMut => quote!(_self: &mut #self_ty),
426 }
427 }
428 FnArg::Typed(pat_type) => quote!(#pat_type),
429 })
430 .collect();
431
432 // Build inner function call arguments
433 let inner_args: Vec<proc_macro2::TokenStream> = inputs
434 .iter()
435 .filter_map(|arg| match arg {
436 FnArg::Typed(pat_type) => {
437 if let syn::Pat::Ident(pat_ident) = pat_type.pat.as_ref() {
438 let ident = &pat_ident.ident;
439 Some(quote!(#ident))
440 } else {
441 None
442 }
443 }
444 FnArg::Receiver(_) => Some(quote!(self)), // Pass self to inner as _self
445 })
446 .collect();
447
448 let inner_fn_name = format_ident!("__simd_inner_{}", fn_name);
449
450 // Choose inline attribute based on args
451 // Note: #[inline(always)] + #[target_feature] requires nightly with
452 // #![feature(target_feature_inline_always)]
453 let inline_attr: Attribute = if args.inline_always {
454 parse_quote!(#[inline(always)])
455 } else {
456 parse_quote!(#[inline])
457 };
458
459 // Transform output and body to replace Self with concrete type if needed
460 let (inner_output, inner_body): (ReturnType, syn::Block) =
461 if let Some(ref self_ty) = args.self_type {
462 let mut replacer = ReplaceSelf {
463 replacement: self_ty,
464 };
465 let transformed_output = replacer.fold_return_type(output.clone());
466 let transformed_body = replacer.fold_block((**body).clone());
467 (transformed_output, transformed_body)
468 } else {
469 (output.clone(), (**body).clone())
470 };
471
472 // Generate the expanded function
473 // If we know the target arch (concrete token), generate cfg-gated real impl + stub
474 let token_type_str = token_type_name.as_deref().unwrap_or("UnknownToken");
475 let expanded = if let Some(arch) = target_arch {
476 quote! {
477 // Real implementation for the correct architecture
478 #[cfg(target_arch = #arch)]
479 #(#attrs)*
480 #vis #sig {
481 #(#target_feature_attrs)*
482 #inline_attr
483 fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #where_clause
484 #inner_body
485
486 // SAFETY: The token parameter proves the required CPU features are available.
487 // Calling a #[target_feature] function from a non-matching context requires
488 // unsafe because the CPU may not support those instructions. The token's
489 // existence proves summon() succeeded, so the features are available.
490 unsafe { #inner_fn_name(#(#inner_args),*) }
491 }
492
493 // Stub for other architectures - the token cannot be obtained, so this is unreachable
494 #[cfg(not(target_arch = #arch))]
495 #(#attrs)*
496 #vis #sig {
497 // This token type cannot be summoned on this architecture.
498 // If you're seeing this at runtime, there's a bug in dispatch logic
499 // or forge_token_dangerously() was used incorrectly.
500 let _ = (#(#inner_args),*); // suppress unused warnings
501 unreachable!(
502 "BUG: {}() was called but requires {} (target_arch = \"{}\"). \
503 {}::summon() returns None on this architecture, so this function \
504 is unreachable in safe code. If you used forge_token_dangerously(), \
505 that is the bug.",
506 stringify!(#fn_name),
507 #token_type_str,
508 #arch,
509 #token_type_str,
510 )
511 }
512 }
513 } else {
514 // No specific arch (trait bounds or generic) - generate without cfg guards
515 quote! {
516 #(#attrs)*
517 #vis #sig {
518 #(#target_feature_attrs)*
519 #inline_attr
520 fn #inner_fn_name #generics (#(#inner_params),*) #inner_output #where_clause
521 #inner_body
522
523 // SAFETY: Calling a #[target_feature] function from a non-matching context
524 // requires unsafe. The token proves the required CPU features are available.
525 unsafe { #inner_fn_name(#(#inner_args),*) }
526 }
527 }
528 };
529
530 expanded.into()
531}
532
533/// Mark a function as an arcane SIMD function.
534///
535/// This macro enables safe use of SIMD intrinsics by generating an inner function
536/// with the appropriate `#[target_feature(enable = "...")]` attributes based on
537/// the token parameter type. The outer function calls the inner function unsafely,
538/// which is justified because the token parameter proves the features are available.
539///
540/// **The token is passed through to the inner function**, so you can call other
541/// token-taking functions from inside `#[arcane]`.
542///
543/// # Token Parameter Forms
544///
545/// The macro supports four forms of token parameters:
546///
547/// ## Concrete Token Types
548///
549/// ```ignore
550/// #[arcane]
551/// fn process(token: Avx2Token, data: &[f32; 8]) -> [f32; 8] {
552/// // AVX2 intrinsics safe here
553/// }
554/// ```
555///
556/// ## impl Trait Bounds
557///
558/// ```ignore
559/// #[arcane]
560/// fn process(token: impl HasX64V2, data: &[f32; 8]) -> [f32; 8] {
561/// // Accepts any token with x86-64-v2 features (SSE4.2+)
562/// }
563/// ```
564///
565/// ## Generic Type Parameters
566///
567/// ```ignore
568/// #[arcane]
569/// fn process<T: HasX64V2>(token: T, data: &[f32; 8]) -> [f32; 8] {
570/// // Generic over any v2-capable token
571/// }
572///
573/// // Also works with where clauses:
574/// #[arcane]
575/// fn process<T>(token: T, data: &[f32; 8]) -> [f32; 8]
576/// where
577/// T: HasX64V2
578/// {
579/// // ...
580/// }
581/// ```
582///
583/// ## Methods with Self Receivers
584///
585/// Methods with `self`, `&self`, `&mut self` receivers are supported via the
586/// `_self = Type` argument. Use `_self` in the function body instead of `self`:
587///
588/// ```ignore
589/// use archmage::{X64V3Token, arcane};
590/// use wide::f32x8;
591///
592/// trait SimdOps {
593/// fn double(&self, token: X64V3Token) -> Self;
594/// fn square(self, token: X64V3Token) -> Self;
595/// fn scale(&mut self, token: X64V3Token, factor: f32);
596/// }
597///
598/// impl SimdOps for f32x8 {
599/// #[arcane(_self = f32x8)]
600/// fn double(&self, _token: X64V3Token) -> Self {
601/// // Use _self instead of self in the body
602/// *_self + *_self
603/// }
604///
605/// #[arcane(_self = f32x8)]
606/// fn square(self, _token: X64V3Token) -> Self {
607/// _self * _self
608/// }
609///
610/// #[arcane(_self = f32x8)]
611/// fn scale(&mut self, _token: X64V3Token, factor: f32) {
612/// *_self = *_self * f32x8::splat(factor);
613/// }
614/// }
615/// ```
616///
617/// **Why `_self`?** The macro generates an inner function where `self` becomes
618/// a regular parameter named `_self`. Using `_self` in your code reminds you
619/// that you're not using the normal `self` keyword.
620///
621/// **All receiver types are supported:**
622/// - `self` (by value/move) → `_self: Type`
623/// - `&self` (shared reference) → `_self: &Type`
624/// - `&mut self` (mutable reference) → `_self: &mut Type`
625///
626/// # Multiple Trait Bounds
627///
628/// When using `impl Trait` or generic bounds with multiple traits,
629/// all required features are enabled:
630///
631/// ```ignore
632/// #[arcane]
633/// fn fma_kernel(token: impl HasX64V2 + HasNeon, data: &[f32; 8]) -> [f32; 8] {
634/// // Cross-platform: SSE4.2 on x86, NEON on ARM
635/// }
636/// ```
637///
638/// # Expansion
639///
640/// The macro expands to approximately:
641///
642/// ```ignore
643/// fn process(token: Avx2Token, data: &[f32; 8]) -> [f32; 8] {
644/// #[target_feature(enable = "avx2")]
645/// #[inline]
646/// fn __simd_inner_process(token: Avx2Token, data: &[f32; 8]) -> [f32; 8] {
647/// let v = unsafe { _mm256_loadu_ps(data.as_ptr()) };
648/// let doubled = _mm256_add_ps(v, v);
649/// let mut out = [0.0f32; 8];
650/// unsafe { _mm256_storeu_ps(out.as_mut_ptr(), doubled) };
651/// out
652/// }
653/// // SAFETY: Calling #[target_feature] fn from non-matching context.
654/// // Token proves the required features are available.
655/// unsafe { __simd_inner_process(token, data) }
656/// }
657/// ```
658///
659/// # Profile Tokens
660///
661/// Profile tokens automatically enable all required features:
662///
663/// ```ignore
664/// #[arcane]
665/// fn kernel(token: X64V3Token, data: &mut [f32]) {
666/// // AVX2 + FMA + BMI1 + BMI2 intrinsics all safe here!
667/// }
668/// ```
669///
670/// # Supported Tokens
671///
672/// - **x86_64 tiers**: `X64V2Token`, `X64V3Token` / `Desktop64` / `Avx2FmaToken`,
673/// `X64V4Token` / `Avx512Token` / `Server64`, `X64V4xToken`, `Avx512Fp16Token`
674/// - **ARM**: `NeonToken` / `Arm64`, `Arm64V2Token`, `Arm64V3Token`,
675/// `NeonAesToken`, `NeonSha3Token`, `NeonCrcToken`
676/// - **WASM**: `Wasm128Token`
677///
678/// # Supported Trait Bounds
679///
680/// - **x86_64 tiers**: `HasX64V2`, `HasX64V4`
681/// - **ARM**: `HasNeon`, `HasNeonAes`, `HasNeonSha3`, `HasArm64V2`, `HasArm64V3`
682///
683/// **Preferred:** Use concrete tokens (`X64V3Token`, `Desktop64`, `NeonToken`) directly.
684/// Concrete token types also work as trait bounds (e.g., `impl X64V3Token`).
685///
686/// **Not supported:** `SimdToken` and `IntoConcreteToken` cannot be used as token
687/// bounds because they don't map to any CPU features. The macro needs concrete
688/// features to generate `#[target_feature]` attributes.
689///
690/// # Options
691///
692/// ## `inline_always`
693///
694/// Use `#[inline(always)]` instead of `#[inline]` for the inner function.
695/// This can improve performance by ensuring aggressive inlining, but requires
696/// nightly Rust with `#![feature(target_feature_inline_always)]` enabled in
697/// the crate using the macro.
698///
699/// ```ignore
700/// #![feature(target_feature_inline_always)]
701///
702/// #[arcane(inline_always)]
703/// fn fast_kernel(token: Avx2Token, data: &mut [f32]) {
704/// // Inner function will use #[inline(always)]
705/// }
706/// ```
707#[proc_macro_attribute]
708pub fn arcane(attr: TokenStream, item: TokenStream) -> TokenStream {
709 let args = parse_macro_input!(attr as ArcaneArgs);
710 let input_fn = parse_macro_input!(item as ItemFn);
711 arcane_impl(input_fn, "arcane", args)
712}
713
714/// Legacy alias for [`arcane`].
715///
716/// **Deprecated:** Use `#[arcane]` instead. This alias exists only for migration.
717#[proc_macro_attribute]
718#[doc(hidden)]
719pub fn simd_fn(attr: TokenStream, item: TokenStream) -> TokenStream {
720 let args = parse_macro_input!(attr as ArcaneArgs);
721 let input_fn = parse_macro_input!(item as ItemFn);
722 arcane_impl(input_fn, "simd_fn", args)
723}
724
725// ============================================================================
726// Rite macro for inner SIMD functions (inlines into matching #[target_feature] callers)
727// ============================================================================
728
729/// Annotate inner SIMD helpers called from `#[arcane]` functions.
730///
731/// Unlike `#[arcane]`, which creates an inner `#[target_feature]` function behind
732/// a safe boundary, `#[rite]` adds `#[target_feature]` and `#[inline]` directly.
733/// LLVM inlines it into any caller with matching features — no boundary crossing.
734///
735/// # When to Use
736///
737/// Use `#[rite]` for helper functions that are **only** called from within
738/// `#[arcane]` functions with matching or superset token types:
739///
740/// ```ignore
741/// use archmage::{arcane, rite, X64V3Token};
742///
743/// #[arcane]
744/// fn outer(token: X64V3Token, data: &[f32; 8]) -> f32 {
745/// // helper inlines — same target features, no boundary
746/// helper(token, data) * 2.0
747/// }
748///
749/// #[rite]
750/// fn helper(token: X64V3Token, data: &[f32; 8]) -> f32 {
751/// // Just has #[target_feature(enable = "avx2,fma,...")]
752/// // Called from #[arcane] context, so features are guaranteed
753/// let v = f32x8::from_array(token, *data);
754/// v.reduce_add()
755/// }
756/// ```
757///
758/// # Safety
759///
760/// `#[rite]` functions can only be safely called from contexts where the
761/// required CPU features are enabled:
762/// - From within `#[arcane]` functions with matching/superset tokens
763/// - From within other `#[rite]` functions with matching/superset tokens
764/// - From code compiled with `-Ctarget-cpu` that enables the features
765///
766/// Calling from other contexts requires `unsafe` and the caller must ensure
767/// the CPU supports the required features.
768///
769/// # Comparison with #[arcane]
770///
771/// | Aspect | `#[arcane]` | `#[rite]` |
772/// |--------|-------------|-----------|
773/// | Creates wrapper | Yes | No |
774/// | Entry point | Yes | No |
775/// | Inlines into caller | No (barrier) | Yes |
776/// | Safe to call anywhere | Yes (with token) | Only from feature-enabled context |
777#[proc_macro_attribute]
778pub fn rite(attr: TokenStream, item: TokenStream) -> TokenStream {
779 // Parse optional arguments (currently just inline_always)
780 let args = parse_macro_input!(attr as RiteArgs);
781 let input_fn = parse_macro_input!(item as ItemFn);
782 rite_impl(input_fn, args)
783}
784
785/// Arguments for the `#[rite]` macro.
786///
787/// Currently empty - `#[inline(always)]` is not supported because
788/// `#[inline(always)]` + `#[target_feature]` requires nightly Rust.
789/// The regular `#[inline]` hint is sufficient when called from
790/// matching `#[target_feature]` contexts.
791#[derive(Default)]
792struct RiteArgs {
793 // No options currently - inline_always doesn't work on stable
794}
795
796impl Parse for RiteArgs {
797 fn parse(input: ParseStream) -> syn::Result<Self> {
798 if !input.is_empty() {
799 let ident: Ident = input.parse()?;
800 return Err(syn::Error::new(
801 ident.span(),
802 "#[rite] takes no arguments. Note: inline_always is not supported \
803 because #[inline(always)] + #[target_feature] requires nightly Rust.",
804 ));
805 }
806 Ok(RiteArgs::default())
807 }
808}
809
810/// Implementation for the `#[rite]` macro.
811fn rite_impl(mut input_fn: ItemFn, args: RiteArgs) -> TokenStream {
812 // Find the token parameter and its features
813 let TokenParamInfo {
814 features,
815 target_arch,
816 ..
817 } = match find_token_param(&input_fn.sig) {
818 Some(result) => result,
819 None => {
820 // Check for specific misuse: featureless traits like SimdToken
821 if let Some(trait_name) = diagnose_featureless_token(&input_fn.sig) {
822 let msg = format!(
823 "`{trait_name}` cannot be used as a token bound in #[rite] \
824 because it doesn't specify any CPU features.\n\
825 \n\
826 #[rite] needs concrete features to generate #[target_feature]. \
827 Use a concrete token or a feature trait:\n\
828 \n\
829 Concrete tokens: X64V3Token, Desktop64, NeonToken, Arm64V2Token, ...\n\
830 Feature traits: impl HasX64V2, impl HasNeon, impl HasArm64V3, ..."
831 );
832 return syn::Error::new_spanned(&input_fn.sig, msg)
833 .to_compile_error()
834 .into();
835 }
836 let msg = "rite requires a token parameter. Supported forms:\n\
837 - Concrete: `token: X64V3Token`\n\
838 - impl Trait: `token: impl HasX64V2`\n\
839 - Generic: `fn foo<T: HasX64V2>(token: T, ...)`";
840 return syn::Error::new_spanned(&input_fn.sig, msg)
841 .to_compile_error()
842 .into();
843 }
844 };
845
846 // Build target_feature attributes
847 let target_feature_attrs: Vec<Attribute> = features
848 .iter()
849 .map(|feature| parse_quote!(#[target_feature(enable = #feature)]))
850 .collect();
851
852 // Always use #[inline] - #[inline(always)] + #[target_feature] requires nightly
853 let _ = args; // RiteArgs is currently empty but kept for future extensibility
854 let inline_attr: Attribute = parse_quote!(#[inline]);
855
856 // Prepend attributes to the function
857 let mut new_attrs = target_feature_attrs;
858 new_attrs.push(inline_attr);
859 new_attrs.append(&mut input_fn.attrs);
860 input_fn.attrs = new_attrs;
861
862 // If we know the target arch, generate cfg-gated impl + stub
863 if let Some(arch) = target_arch {
864 let vis = &input_fn.vis;
865 let sig = &input_fn.sig;
866 let attrs = &input_fn.attrs;
867 let block = &input_fn.block;
868
869 quote! {
870 #[cfg(target_arch = #arch)]
871 #(#attrs)*
872 #vis #sig
873 #block
874
875 #[cfg(not(target_arch = #arch))]
876 #vis #sig {
877 unreachable!(concat!(
878 "This function requires ",
879 #arch,
880 " architecture"
881 ))
882 }
883 }
884 .into()
885 } else {
886 // No specific arch (trait bounds) - just emit the annotated function
887 quote!(#input_fn).into()
888 }
889}
890
891// =============================================================================
892// magetypes! macro - generate platform variants from generic function
893// =============================================================================
894
895/// Generate platform-specific variants from a function by replacing `Token`.
896///
897/// Use `Token` as a placeholder for the token type. The macro generates
898/// suffixed variants with `Token` replaced by the concrete token type, and
899/// each variant wrapped in the appropriate `#[cfg(target_arch = ...)]` guard.
900///
901/// # Default tiers
902///
903/// Without arguments, generates `_v3`, `_v4`, `_neon`, `_wasm128`, `_scalar`:
904///
905/// ```rust,ignore
906/// #[magetypes]
907/// fn process(token: Token, data: &[f32]) -> f32 {
908/// inner_simd_work(token, data)
909/// }
910/// ```
911///
912/// # Explicit tiers
913///
914/// Specify which tiers to generate:
915///
916/// ```rust,ignore
917/// #[magetypes(v1, v3, neon)]
918/// fn process(token: Token, data: &[f32]) -> f32 {
919/// inner_simd_work(token, data)
920/// }
921/// // Generates: process_v1, process_v3, process_neon, process_scalar
922/// ```
923///
924/// `scalar` is always included implicitly.
925///
926/// Known tiers: `v1`, `v2`, `v3`, `v4`, `v4x`, `neon`, `neon_aes`,
927/// `neon_sha3`, `neon_crc`, `wasm128`, `scalar`.
928///
929/// # What gets replaced
930///
931/// **Only `Token`** is replaced — with the concrete token type for each variant
932/// (e.g., `archmage::X64V3Token`, `archmage::ScalarToken`). SIMD types like
933/// `f32x8` and constants like `LANES` are **not** replaced by this macro.
934///
935/// # Usage with incant!
936///
937/// The generated variants work with `incant!` for dispatch:
938///
939/// ```rust,ignore
940/// pub fn process_api(data: &[f32]) -> f32 {
941/// incant!(process(data))
942/// }
943///
944/// // Or with matching explicit tiers:
945/// pub fn process_api(data: &[f32]) -> f32 {
946/// incant!(process(data), [v1, v3, neon])
947/// }
948/// ```
949#[proc_macro_attribute]
950pub fn magetypes(attr: TokenStream, item: TokenStream) -> TokenStream {
951 let input_fn = parse_macro_input!(item as ItemFn);
952
953 // Parse optional tier list from attribute args
954 let tier_names: Vec<String> = if attr.is_empty() {
955 DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect()
956 } else {
957 let parser = |input: ParseStream| input.parse_terminated(Ident::parse, Token![,]);
958 let idents = match syn::parse::Parser::parse(parser, attr) {
959 Ok(p) => p,
960 Err(e) => return e.to_compile_error().into(),
961 };
962 idents.iter().map(|i| i.to_string()).collect()
963 };
964
965 let tiers = match resolve_tiers(&tier_names, input_fn.sig.ident.span()) {
966 Ok(t) => t,
967 Err(e) => return e.to_compile_error().into(),
968 };
969
970 magetypes_impl(input_fn, &tiers)
971}
972
973fn magetypes_impl(mut input_fn: ItemFn, tiers: &[&TierDescriptor]) -> TokenStream {
974 // Strip user-provided #[arcane] / #[rite] to prevent double-wrapping
975 // (magetypes auto-adds #[arcane] on non-scalar variants)
976 input_fn
977 .attrs
978 .retain(|attr| !attr.path().is_ident("arcane") && !attr.path().is_ident("rite"));
979
980 let fn_name = &input_fn.sig.ident;
981 let fn_attrs = &input_fn.attrs;
982
983 // Convert function to string for text substitution
984 let fn_str = input_fn.to_token_stream().to_string();
985
986 let mut variants = Vec::new();
987
988 for tier in tiers {
989 // Create suffixed function name
990 let suffixed_name = format!("{}_{}", fn_name, tier.suffix);
991
992 // Do text substitution
993 let mut variant_str = fn_str.clone();
994
995 // Replace function name
996 variant_str = variant_str.replacen(&fn_name.to_string(), &suffixed_name, 1);
997
998 // Replace Token type with concrete token
999 variant_str = variant_str.replace("Token", tier.token_path);
1000
1001 // Parse back to tokens
1002 let variant_tokens: proc_macro2::TokenStream = match variant_str.parse() {
1003 Ok(t) => t,
1004 Err(e) => {
1005 return syn::Error::new_spanned(
1006 &input_fn,
1007 format!(
1008 "Failed to parse generated variant `{}`: {}",
1009 suffixed_name, e
1010 ),
1011 )
1012 .to_compile_error()
1013 .into();
1014 }
1015 };
1016
1017 // Add cfg guards
1018 let cfg_guard = match (tier.target_arch, tier.cargo_feature) {
1019 (Some(arch), Some(feature)) => {
1020 quote! { #[cfg(all(target_arch = #arch, feature = #feature))] }
1021 }
1022 (Some(arch), None) => {
1023 quote! { #[cfg(target_arch = #arch)] }
1024 }
1025 (None, Some(feature)) => {
1026 quote! { #[cfg(feature = #feature)] }
1027 }
1028 (None, None) => {
1029 quote! {} // No guard needed (scalar)
1030 }
1031 };
1032
1033 variants.push(if tier.name != "scalar" {
1034 // Non-scalar variants get #[arcane] so target_feature is applied
1035 quote! {
1036 #cfg_guard
1037 #[archmage::arcane]
1038 #variant_tokens
1039 }
1040 } else {
1041 quote! {
1042 #cfg_guard
1043 #variant_tokens
1044 }
1045 });
1046 }
1047
1048 // Remove attributes from the list that should not be duplicated
1049 let filtered_attrs: Vec<_> = fn_attrs
1050 .iter()
1051 .filter(|a| !a.path().is_ident("magetypes"))
1052 .collect();
1053
1054 let output = quote! {
1055 #(#filtered_attrs)*
1056 #(#variants)*
1057 };
1058
1059 output.into()
1060}
1061
1062// =============================================================================
1063// incant! macro - dispatch to platform-specific variants
1064// =============================================================================
1065
1066// =============================================================================
1067// Tier descriptors for incant! and #[magetypes]
1068// =============================================================================
1069
1070/// Describes a dispatch tier for incant! and #[magetypes].
1071struct TierDescriptor {
1072 /// Tier name as written in user code (e.g., "v3", "neon")
1073 name: &'static str,
1074 /// Function suffix (e.g., "v3", "neon", "scalar")
1075 suffix: &'static str,
1076 /// Token type path (e.g., "archmage::X64V3Token")
1077 token_path: &'static str,
1078 /// IntoConcreteToken method name (e.g., "as_x64v3")
1079 as_method: &'static str,
1080 /// Target architecture for cfg guard (None = no guard)
1081 target_arch: Option<&'static str>,
1082 /// Required cargo feature (None = no feature guard)
1083 cargo_feature: Option<&'static str>,
1084 /// Dispatch priority (higher = tried first within same arch)
1085 priority: u32,
1086}
1087
1088/// All known tiers in dispatch-priority order (highest first within arch).
1089const ALL_TIERS: &[TierDescriptor] = &[
1090 // x86: highest to lowest
1091 TierDescriptor {
1092 name: "v4x",
1093 suffix: "v4x",
1094 token_path: "archmage::X64V4xToken",
1095 as_method: "as_x64v4x",
1096 target_arch: Some("x86_64"),
1097 cargo_feature: Some("avx512"),
1098 priority: 50,
1099 },
1100 TierDescriptor {
1101 name: "v4",
1102 suffix: "v4",
1103 token_path: "archmage::X64V4Token",
1104 as_method: "as_x64v4",
1105 target_arch: Some("x86_64"),
1106 cargo_feature: Some("avx512"),
1107 priority: 40,
1108 },
1109 TierDescriptor {
1110 name: "v3",
1111 suffix: "v3",
1112 token_path: "archmage::X64V3Token",
1113 as_method: "as_x64v3",
1114 target_arch: Some("x86_64"),
1115 cargo_feature: None,
1116 priority: 30,
1117 },
1118 TierDescriptor {
1119 name: "v2",
1120 suffix: "v2",
1121 token_path: "archmage::X64V2Token",
1122 as_method: "as_x64v2",
1123 target_arch: Some("x86_64"),
1124 cargo_feature: None,
1125 priority: 20,
1126 },
1127 TierDescriptor {
1128 name: "v1",
1129 suffix: "v1",
1130 token_path: "archmage::X64V1Token",
1131 as_method: "as_x64v1",
1132 target_arch: Some("x86_64"),
1133 cargo_feature: None,
1134 priority: 10,
1135 },
1136 // ARM: highest to lowest
1137 TierDescriptor {
1138 name: "arm_v3",
1139 suffix: "arm_v3",
1140 token_path: "archmage::Arm64V3Token",
1141 as_method: "as_arm_v3",
1142 target_arch: Some("aarch64"),
1143 cargo_feature: None,
1144 priority: 50,
1145 },
1146 TierDescriptor {
1147 name: "arm_v2",
1148 suffix: "arm_v2",
1149 token_path: "archmage::Arm64V2Token",
1150 as_method: "as_arm_v2",
1151 target_arch: Some("aarch64"),
1152 cargo_feature: None,
1153 priority: 40,
1154 },
1155 TierDescriptor {
1156 name: "neon_aes",
1157 suffix: "neon_aes",
1158 token_path: "archmage::NeonAesToken",
1159 as_method: "as_neon_aes",
1160 target_arch: Some("aarch64"),
1161 cargo_feature: None,
1162 priority: 30,
1163 },
1164 TierDescriptor {
1165 name: "neon_sha3",
1166 suffix: "neon_sha3",
1167 token_path: "archmage::NeonSha3Token",
1168 as_method: "as_neon_sha3",
1169 target_arch: Some("aarch64"),
1170 cargo_feature: None,
1171 priority: 30,
1172 },
1173 TierDescriptor {
1174 name: "neon_crc",
1175 suffix: "neon_crc",
1176 token_path: "archmage::NeonCrcToken",
1177 as_method: "as_neon_crc",
1178 target_arch: Some("aarch64"),
1179 cargo_feature: None,
1180 priority: 30,
1181 },
1182 TierDescriptor {
1183 name: "neon",
1184 suffix: "neon",
1185 token_path: "archmage::NeonToken",
1186 as_method: "as_neon",
1187 target_arch: Some("aarch64"),
1188 cargo_feature: None,
1189 priority: 20,
1190 },
1191 // WASM
1192 TierDescriptor {
1193 name: "wasm128",
1194 suffix: "wasm128",
1195 token_path: "archmage::Wasm128Token",
1196 as_method: "as_wasm128",
1197 target_arch: Some("wasm32"),
1198 cargo_feature: None,
1199 priority: 20,
1200 },
1201 // Scalar (always last)
1202 TierDescriptor {
1203 name: "scalar",
1204 suffix: "scalar",
1205 token_path: "archmage::ScalarToken",
1206 as_method: "as_scalar",
1207 target_arch: None,
1208 cargo_feature: None,
1209 priority: 0,
1210 },
1211];
1212
1213/// Default tiers (backwards-compatible with pre-explicit behavior).
1214const DEFAULT_TIER_NAMES: &[&str] = &["v4", "v3", "neon", "wasm128", "scalar"];
1215
1216/// Look up a tier by name, returning an error on unknown names.
1217fn find_tier(name: &str) -> Option<&'static TierDescriptor> {
1218 ALL_TIERS.iter().find(|t| t.name == name)
1219}
1220
1221/// Resolve tier names to descriptors, sorted by dispatch priority (highest first).
1222/// Always appends "scalar" if not already present.
1223fn resolve_tiers(
1224 tier_names: &[String],
1225 error_span: proc_macro2::Span,
1226) -> syn::Result<Vec<&'static TierDescriptor>> {
1227 let mut tiers = Vec::new();
1228 for name in tier_names {
1229 match find_tier(name) {
1230 Some(tier) => tiers.push(tier),
1231 None => {
1232 let known: Vec<&str> = ALL_TIERS.iter().map(|t| t.name).collect();
1233 return Err(syn::Error::new(
1234 error_span,
1235 format!("unknown tier `{}`. Known tiers: {}", name, known.join(", ")),
1236 ));
1237 }
1238 }
1239 }
1240
1241 // Always include scalar fallback
1242 if !tiers.iter().any(|t| t.name == "scalar") {
1243 tiers.push(find_tier("scalar").unwrap());
1244 }
1245
1246 // Sort by priority (highest first) for correct dispatch order
1247 tiers.sort_by(|a, b| b.priority.cmp(&a.priority));
1248
1249 Ok(tiers)
1250}
1251
1252// =============================================================================
1253// incant! macro - dispatch to platform-specific variants
1254// =============================================================================
1255
1256/// Input for the incant! macro
1257struct IncantInput {
1258 /// Function name to call
1259 func_name: Ident,
1260 /// Arguments to pass
1261 args: Vec<syn::Expr>,
1262 /// Optional token variable for passthrough mode
1263 with_token: Option<syn::Expr>,
1264 /// Optional explicit tier list (None = default tiers)
1265 tiers: Option<(Vec<String>, proc_macro2::Span)>,
1266}
1267
1268impl Parse for IncantInput {
1269 fn parse(input: ParseStream) -> syn::Result<Self> {
1270 // Parse: function_name(arg1, arg2, ...) [with token_expr] [, [tier1, tier2, ...]]
1271 let func_name: Ident = input.parse()?;
1272
1273 // Parse parenthesized arguments
1274 let content;
1275 syn::parenthesized!(content in input);
1276 let args = content
1277 .parse_terminated(syn::Expr::parse, Token![,])?
1278 .into_iter()
1279 .collect();
1280
1281 // Check for optional "with token"
1282 let with_token = if input.peek(Ident) {
1283 let kw: Ident = input.parse()?;
1284 if kw != "with" {
1285 return Err(syn::Error::new_spanned(kw, "expected `with` keyword"));
1286 }
1287 Some(input.parse()?)
1288 } else {
1289 None
1290 };
1291
1292 // Check for optional tier list: , [tier1, tier2, ...]
1293 let tiers = if input.peek(Token![,]) {
1294 let _: Token![,] = input.parse()?;
1295 let bracket_content;
1296 let bracket = syn::bracketed!(bracket_content in input);
1297 let tier_idents = bracket_content.parse_terminated(Ident::parse, Token![,])?;
1298 let tier_names: Vec<String> = tier_idents.iter().map(|i| i.to_string()).collect();
1299 Some((tier_names, bracket.span.join()))
1300 } else {
1301 None
1302 };
1303
1304 Ok(IncantInput {
1305 func_name,
1306 args,
1307 with_token,
1308 tiers,
1309 })
1310 }
1311}
1312
1313/// Dispatch to platform-specific SIMD variants.
1314///
1315/// # Entry Point Mode (no token yet)
1316///
1317/// Summons tokens and dispatches to the best available variant:
1318///
1319/// ```rust,ignore
1320/// pub fn public_api(data: &[f32]) -> f32 {
1321/// incant!(dot(data))
1322/// }
1323/// ```
1324///
1325/// Expands to runtime feature detection + dispatch to `dot_v3`, `dot_v4`,
1326/// `dot_neon`, `dot_wasm128`, or `dot_scalar`.
1327///
1328/// # Explicit Tiers
1329///
1330/// Specify which tiers to dispatch to:
1331///
1332/// ```rust,ignore
1333/// // Only dispatch to v1, v3, neon, and scalar
1334/// pub fn api(data: &[f32]) -> f32 {
1335/// incant!(process(data), [v1, v3, neon])
1336/// }
1337/// ```
1338///
1339/// `scalar` is always included implicitly. Unknown tier names cause a
1340/// compile error. Tiers are automatically sorted into correct dispatch
1341/// order (highest priority first).
1342///
1343/// Known tiers: `v1`, `v2`, `v3`, `v4`, `v4x`, `neon`, `neon_aes`,
1344/// `neon_sha3`, `neon_crc`, `wasm128`, `scalar`.
1345///
1346/// # Passthrough Mode (already have token)
1347///
1348/// Uses compile-time dispatch via `IntoConcreteToken`:
1349///
1350/// ```rust,ignore
1351/// #[arcane]
1352/// fn outer(token: X64V3Token, data: &[f32]) -> f32 {
1353/// incant!(inner(data) with token)
1354/// }
1355/// ```
1356///
1357/// Also supports explicit tiers:
1358///
1359/// ```rust,ignore
1360/// fn inner<T: IntoConcreteToken>(token: T, data: &[f32]) -> f32 {
1361/// incant!(process(data) with token, [v3, neon])
1362/// }
1363/// ```
1364///
1365/// The compiler monomorphizes the dispatch, eliminating non-matching branches.
1366///
1367/// # Variant Naming
1368///
1369/// Functions must have suffixed variants matching the selected tiers:
1370/// - `_v1` for `X64V1Token`
1371/// - `_v2` for `X64V2Token`
1372/// - `_v3` for `X64V3Token`
1373/// - `_v4` for `X64V4Token` (requires `avx512` feature)
1374/// - `_v4x` for `X64V4xToken` (requires `avx512` feature)
1375/// - `_neon` for `NeonToken`
1376/// - `_neon_aes` for `NeonAesToken`
1377/// - `_neon_sha3` for `NeonSha3Token`
1378/// - `_neon_crc` for `NeonCrcToken`
1379/// - `_wasm128` for `Wasm128Token`
1380/// - `_scalar` for `ScalarToken`
1381#[proc_macro]
1382pub fn incant(input: TokenStream) -> TokenStream {
1383 let input = parse_macro_input!(input as IncantInput);
1384 incant_impl(input)
1385}
1386
1387/// Legacy alias for [`incant!`].
1388#[proc_macro]
1389pub fn simd_route(input: TokenStream) -> TokenStream {
1390 let input = parse_macro_input!(input as IncantInput);
1391 incant_impl(input)
1392}
1393
1394fn incant_impl(input: IncantInput) -> TokenStream {
1395 let func_name = &input.func_name;
1396 let args = &input.args;
1397
1398 // Resolve tiers
1399 let tier_names: Vec<String> = match &input.tiers {
1400 Some((names, _)) => names.clone(),
1401 None => DEFAULT_TIER_NAMES.iter().map(|s| s.to_string()).collect(),
1402 };
1403 let error_span = input
1404 .tiers
1405 .as_ref()
1406 .map(|(_, span)| *span)
1407 .unwrap_or_else(|| func_name.span());
1408
1409 let tiers = match resolve_tiers(&tier_names, error_span) {
1410 Ok(t) => t,
1411 Err(e) => return e.to_compile_error().into(),
1412 };
1413
1414 // Group tiers by architecture for cfg-guarded blocks
1415 // Within each arch, tiers are already sorted by priority (highest first)
1416 if let Some(token_expr) = &input.with_token {
1417 gen_incant_passthrough(func_name, args, token_expr, &tiers)
1418 } else {
1419 gen_incant_entry(func_name, args, &tiers)
1420 }
1421}
1422
1423/// Generate incant! passthrough mode (already have a token).
1424fn gen_incant_passthrough(
1425 func_name: &Ident,
1426 args: &[syn::Expr],
1427 token_expr: &syn::Expr,
1428 tiers: &[&TierDescriptor],
1429) -> TokenStream {
1430 let mut dispatch_arms = Vec::new();
1431
1432 // Group non-scalar tiers by (target_arch, cargo_feature) for nested cfg blocks
1433 let mut arch_groups: Vec<(Option<&str>, Option<&str>, Vec<&TierDescriptor>)> = Vec::new();
1434 for tier in tiers {
1435 if tier.name == "scalar" {
1436 continue; // Handle scalar separately at the end
1437 }
1438 let key = (tier.target_arch, tier.cargo_feature);
1439 if let Some(group) = arch_groups.iter_mut().find(|(a, f, _)| (*a, *f) == key) {
1440 group.2.push(tier);
1441 } else {
1442 arch_groups.push((tier.target_arch, tier.cargo_feature, vec![tier]));
1443 }
1444 }
1445
1446 for (target_arch, cargo_feature, group_tiers) in &arch_groups {
1447 let mut tier_checks = Vec::new();
1448 for tier in group_tiers {
1449 let fn_suffixed = format_ident!("{}_{}", func_name, tier.suffix);
1450 let as_method = format_ident!("{}", tier.as_method);
1451 tier_checks.push(quote! {
1452 if let Some(__t) = __incant_token.#as_method() {
1453 break '__incant #fn_suffixed(__t, #(#args),*);
1454 }
1455 });
1456 }
1457
1458 let inner = quote! { #(#tier_checks)* };
1459
1460 let guarded = match (target_arch, cargo_feature) {
1461 (Some(arch), Some(feat)) => quote! {
1462 #[cfg(target_arch = #arch)]
1463 {
1464 #[cfg(feature = #feat)]
1465 { #inner }
1466 }
1467 },
1468 (Some(arch), None) => quote! {
1469 #[cfg(target_arch = #arch)]
1470 { #inner }
1471 },
1472 (None, Some(feat)) => quote! {
1473 #[cfg(feature = #feat)]
1474 { #inner }
1475 },
1476 (None, None) => inner,
1477 };
1478
1479 dispatch_arms.push(guarded);
1480 }
1481
1482 // Scalar fallback (always last)
1483 let fn_scalar = format_ident!("{}_scalar", func_name);
1484 let scalar_arm = if tiers.iter().any(|t| t.name == "scalar") {
1485 quote! {
1486 if let Some(__t) = __incant_token.as_scalar() {
1487 break '__incant #fn_scalar(__t, #(#args),*);
1488 }
1489 unreachable!("Token did not match any known variant")
1490 }
1491 } else {
1492 quote! { unreachable!("Token did not match any known variant") }
1493 };
1494
1495 let expanded = quote! {
1496 '__incant: {
1497 use archmage::IntoConcreteToken;
1498 let __incant_token = #token_expr;
1499 #(#dispatch_arms)*
1500 #scalar_arm
1501 }
1502 };
1503 expanded.into()
1504}
1505
1506/// Generate incant! entry point mode (summon tokens).
1507fn gen_incant_entry(
1508 func_name: &Ident,
1509 args: &[syn::Expr],
1510 tiers: &[&TierDescriptor],
1511) -> TokenStream {
1512 let mut dispatch_arms = Vec::new();
1513
1514 // Group non-scalar tiers by target_arch for cfg blocks.
1515 // Within each arch group, further split by cargo_feature.
1516 let mut arch_groups: Vec<(Option<&str>, Vec<&TierDescriptor>)> = Vec::new();
1517 for tier in tiers {
1518 if tier.name == "scalar" {
1519 continue;
1520 }
1521 if let Some(group) = arch_groups.iter_mut().find(|(a, _)| *a == tier.target_arch) {
1522 group.1.push(tier);
1523 } else {
1524 arch_groups.push((tier.target_arch, vec![tier]));
1525 }
1526 }
1527
1528 for (target_arch, group_tiers) in &arch_groups {
1529 let mut tier_checks = Vec::new();
1530 for tier in group_tiers {
1531 let fn_suffixed = format_ident!("{}_{}", func_name, tier.suffix);
1532 let token_path: syn::Path = syn::parse_str(tier.token_path).unwrap();
1533
1534 let check = quote! {
1535 if let Some(__t) = #token_path::summon() {
1536 break '__incant #fn_suffixed(__t, #(#args),*);
1537 }
1538 };
1539
1540 if let Some(feat) = tier.cargo_feature {
1541 tier_checks.push(quote! {
1542 #[cfg(feature = #feat)]
1543 { #check }
1544 });
1545 } else {
1546 tier_checks.push(check);
1547 }
1548 }
1549
1550 let inner = quote! { #(#tier_checks)* };
1551
1552 if let Some(arch) = target_arch {
1553 dispatch_arms.push(quote! {
1554 #[cfg(target_arch = #arch)]
1555 { #inner }
1556 });
1557 } else {
1558 dispatch_arms.push(inner);
1559 }
1560 }
1561
1562 // Scalar fallback
1563 let fn_scalar = format_ident!("{}_scalar", func_name);
1564
1565 let expanded = quote! {
1566 '__incant: {
1567 use archmage::SimdToken;
1568 #(#dispatch_arms)*
1569 #fn_scalar(archmage::ScalarToken, #(#args),*)
1570 }
1571 };
1572 expanded.into()
1573}
1574
1575// =============================================================================
1576// Unit tests for token/trait recognition maps
1577// =============================================================================
1578
1579#[cfg(test)]
1580mod tests {
1581 use super::*;
1582
1583 use super::generated::{ALL_CONCRETE_TOKENS, ALL_TRAIT_NAMES};
1584
1585 #[test]
1586 fn every_concrete_token_is_in_token_to_features() {
1587 for &name in ALL_CONCRETE_TOKENS {
1588 assert!(
1589 token_to_features(name).is_some(),
1590 "Token `{}` exists in runtime crate but is NOT recognized by \
1591 token_to_features() in the proc macro. Add it!",
1592 name
1593 );
1594 }
1595 }
1596
1597 #[test]
1598 fn every_trait_is_in_trait_to_features() {
1599 for &name in ALL_TRAIT_NAMES {
1600 assert!(
1601 trait_to_features(name).is_some(),
1602 "Trait `{}` exists in runtime crate but is NOT recognized by \
1603 trait_to_features() in the proc macro. Add it!",
1604 name
1605 );
1606 }
1607 }
1608
1609 #[test]
1610 fn token_aliases_map_to_same_features() {
1611 // Desktop64 = X64V3Token
1612 assert_eq!(
1613 token_to_features("Desktop64"),
1614 token_to_features("X64V3Token"),
1615 "Desktop64 and X64V3Token should map to identical features"
1616 );
1617
1618 // Server64 = X64V4Token = Avx512Token
1619 assert_eq!(
1620 token_to_features("Server64"),
1621 token_to_features("X64V4Token"),
1622 "Server64 and X64V4Token should map to identical features"
1623 );
1624 assert_eq!(
1625 token_to_features("X64V4Token"),
1626 token_to_features("Avx512Token"),
1627 "X64V4Token and Avx512Token should map to identical features"
1628 );
1629
1630 // Arm64 = NeonToken
1631 assert_eq!(
1632 token_to_features("Arm64"),
1633 token_to_features("NeonToken"),
1634 "Arm64 and NeonToken should map to identical features"
1635 );
1636 }
1637
1638 #[test]
1639 fn trait_to_features_includes_tokens_as_bounds() {
1640 // Tier tokens should also work as trait bounds
1641 // (for `impl X64V3Token` patterns, even though Rust won't allow it,
1642 // the macro processes AST before type checking)
1643 let tier_tokens = [
1644 "X64V2Token",
1645 "X64V3Token",
1646 "Desktop64",
1647 "Avx2FmaToken",
1648 "X64V4Token",
1649 "Avx512Token",
1650 "Server64",
1651 "X64V4xToken",
1652 "Avx512Fp16Token",
1653 "NeonToken",
1654 "Arm64",
1655 "NeonAesToken",
1656 "NeonSha3Token",
1657 "NeonCrcToken",
1658 "Arm64V2Token",
1659 "Arm64V3Token",
1660 ];
1661
1662 for &name in &tier_tokens {
1663 assert!(
1664 trait_to_features(name).is_some(),
1665 "Tier token `{}` should also be recognized in trait_to_features() \
1666 for use as a generic bound. Add it!",
1667 name
1668 );
1669 }
1670 }
1671
1672 #[test]
1673 fn trait_features_are_cumulative() {
1674 // HasX64V4 should include all HasX64V2 features plus more
1675 let v2_features = trait_to_features("HasX64V2").unwrap();
1676 let v4_features = trait_to_features("HasX64V4").unwrap();
1677
1678 for &f in v2_features {
1679 assert!(
1680 v4_features.contains(&f),
1681 "HasX64V4 should include v2 feature `{}` but doesn't",
1682 f
1683 );
1684 }
1685
1686 // v4 should have more features than v2
1687 assert!(
1688 v4_features.len() > v2_features.len(),
1689 "HasX64V4 should have more features than HasX64V2"
1690 );
1691 }
1692
1693 #[test]
1694 fn x64v3_trait_features_include_v2() {
1695 // X64V3Token as trait bound should include v2 features
1696 let v2 = trait_to_features("HasX64V2").unwrap();
1697 let v3 = trait_to_features("X64V3Token").unwrap();
1698
1699 for &f in v2 {
1700 assert!(
1701 v3.contains(&f),
1702 "X64V3Token trait features should include v2 feature `{}` but don't",
1703 f
1704 );
1705 }
1706 }
1707
1708 #[test]
1709 fn has_neon_aes_includes_neon() {
1710 let neon = trait_to_features("HasNeon").unwrap();
1711 let neon_aes = trait_to_features("HasNeonAes").unwrap();
1712
1713 for &f in neon {
1714 assert!(
1715 neon_aes.contains(&f),
1716 "HasNeonAes should include NEON feature `{}`",
1717 f
1718 );
1719 }
1720 }
1721
1722 #[test]
1723 fn no_removed_traits_are_recognized() {
1724 // These traits were removed in 0.3.0 and should NOT be recognized
1725 let removed = [
1726 "HasSse",
1727 "HasSse2",
1728 "HasSse41",
1729 "HasSse42",
1730 "HasAvx",
1731 "HasAvx2",
1732 "HasFma",
1733 "HasAvx512f",
1734 "HasAvx512bw",
1735 "HasAvx512vl",
1736 "HasAvx512vbmi2",
1737 "HasSve",
1738 "HasSve2",
1739 ];
1740
1741 for &name in &removed {
1742 assert!(
1743 trait_to_features(name).is_none(),
1744 "Removed trait `{}` should NOT be in trait_to_features(). \
1745 It was removed in 0.3.0 — users should migrate to tier traits.",
1746 name
1747 );
1748 }
1749 }
1750
1751 #[test]
1752 fn no_nonexistent_tokens_are_recognized() {
1753 // These tokens don't exist and should NOT be recognized
1754 let fake = [
1755 "SveToken",
1756 "Sve2Token",
1757 "Avx512VnniToken",
1758 "X64V4ModernToken",
1759 "NeonFp16Token",
1760 ];
1761
1762 for &name in &fake {
1763 assert!(
1764 token_to_features(name).is_none(),
1765 "Non-existent token `{}` should NOT be in token_to_features()",
1766 name
1767 );
1768 }
1769 }
1770
1771 #[test]
1772 fn featureless_traits_are_not_in_registries() {
1773 // SimdToken and IntoConcreteToken should NOT be in any feature registry
1774 // because they don't map to CPU features
1775 for &name in FEATURELESS_TRAIT_NAMES {
1776 assert!(
1777 token_to_features(name).is_none(),
1778 "`{}` should NOT be in token_to_features() — it has no CPU features",
1779 name
1780 );
1781 assert!(
1782 trait_to_features(name).is_none(),
1783 "`{}` should NOT be in trait_to_features() — it has no CPU features",
1784 name
1785 );
1786 }
1787 }
1788
1789 #[test]
1790 fn find_featureless_trait_detects_simdtoken() {
1791 let names = vec!["SimdToken".to_string()];
1792 assert_eq!(find_featureless_trait(&names), Some("SimdToken"));
1793
1794 let names = vec!["IntoConcreteToken".to_string()];
1795 assert_eq!(find_featureless_trait(&names), Some("IntoConcreteToken"));
1796
1797 // Feature-bearing traits should NOT be detected
1798 let names = vec!["HasX64V2".to_string()];
1799 assert_eq!(find_featureless_trait(&names), None);
1800
1801 let names = vec!["HasNeon".to_string()];
1802 assert_eq!(find_featureless_trait(&names), None);
1803
1804 // Mixed: if SimdToken is among real traits, still detected
1805 let names = vec!["SimdToken".to_string(), "HasX64V2".to_string()];
1806 assert_eq!(find_featureless_trait(&names), Some("SimdToken"));
1807 }
1808
1809 #[test]
1810 fn arm64_v2_v3_traits_are_cumulative() {
1811 let v2_features = trait_to_features("HasArm64V2").unwrap();
1812 let v3_features = trait_to_features("HasArm64V3").unwrap();
1813
1814 for &f in v2_features {
1815 assert!(
1816 v3_features.contains(&f),
1817 "HasArm64V3 should include v2 feature `{}` but doesn't",
1818 f
1819 );
1820 }
1821
1822 assert!(
1823 v3_features.len() > v2_features.len(),
1824 "HasArm64V3 should have more features than HasArm64V2"
1825 );
1826 }
1827}