archmage/lib.rs
1//! # archmage
2//!
3//! > Safely invoke your intrinsic power, using the tokens granted to you by the CPU.
4//! > Cast primitive magics faster than any mage alive.
5//!
6//! archmage provides capability tokens that prove CPU feature availability at runtime,
7//! making raw SIMD intrinsics safe to call via the `#[arcane]` macro.
8//!
9//! ## Quick Example
10//!
11//! ```rust,ignore
12//! use archmage::{X64V3Token, SimdToken, arcane};
13//!
14//! #[arcane(import_intrinsics)]
15//! fn multiply_add(_token: X64V3Token, a: &[f32; 8], b: &[f32; 8]) -> [f32; 8] {
16//! // import_intrinsics brings all intrinsics + safe memory ops into scope
17//! let va = _mm256_loadu_ps(a); // Takes &[f32; 8], not *const f32
18//! let vb = _mm256_loadu_ps(b);
19//!
20//! // Value-based intrinsics are SAFE inside #[arcane]! (Rust 1.85+)
21//! let result = _mm256_fmadd_ps(va, vb, va);
22//!
23//! let mut out = [0.0f32; 8];
24//! _mm256_storeu_ps(&mut out, result);
25//! out
26//! }
27//!
28//! fn main() {
29//! // X64V3Token: AVX2 + FMA + BMI2 (Haswell 2013+, Zen 1+)
30//! // CPUID check elided if compiled with -C target-cpu=native
31//! if let Some(token) = X64V3Token::summon() {
32//! let result = multiply_add(token, &[1.0; 8], &[2.0; 8]);
33//! }
34//! }
35//! ```
36//!
37//! ## Auto-Imports
38//!
39//! `import_intrinsics` is the recommended default — it injects
40//! `archmage::intrinsics::{arch}::*` into the function body, giving you all
41//! platform types, value intrinsics, and safe memory ops in one import:
42//!
43//! ```rust,ignore
44//! use archmage::{X64V3Token, SimdToken, arcane};
45//!
46//! #[arcane(import_intrinsics)]
47//! fn load(_token: X64V3Token, data: &[f32; 8]) -> __m256 {
48//! _mm256_loadu_ps(data) // Safe! Takes &[f32; 8], not *const f32.
49//! }
50//! ```
51//!
52//! The prelude (`use archmage::prelude::*`) is still available for module-level imports.
53//! See the [`prelude`] module for full documentation.
54//!
55//! ## How It Works
56//!
57//! **Capability Tokens** are zero-sized proof types created via `summon()`, which
58//! checks CPUID at runtime (elided if compiled with target features enabled).
59//! See [`token-registry.toml`](https://github.com/imazen/archmage/blob/main/token-registry.toml)
60//! for the complete mapping of tokens to CPU features.
61//!
62//! **The `#[arcane]` and `#[rite]` macros** determine which `#[target_feature]`
63//! attributes to emit. `#[arcane]` reads the token type from the function
64//! signature. `#[rite]` works in three modes: token-based (reads the token
65//! parameter), tier-based (`#[rite(v3)]` — no token needed), or multi-tier
66//! (`#[rite(v3, v4, neon)]` — generates suffixed variants `fn_v3`, `fn_v4`,
67//! `fn_neon`).
68//!
69//! Descriptive aliases are available for AI-assisted coding:
70//! `#[token_target_features_boundary]` = `#[arcane]`,
71//! `#[token_target_features]` = `#[rite]`,
72//! `dispatch_variant!` = `incant!`.
73//!
74//! `#[arcane]` generates a sibling `#[target_feature]` function at the same
75//! scope, plus a safe wrapper that calls it. Since both live in the same scope,
76//! `self` and `Self` work naturally in methods. For trait impls, use
77//! `#[arcane(_self = Type)]` (nested mode). On wrong architectures, functions
78//! are cfg'd out by default. Use `incant!` for cross-arch dispatch.
79//!
80//! `#[rite]` applies `#[target_feature]` + `#[inline]` directly to the
81//! function, with no wrapper and no boundary. It works in three modes:
82//! - **Token-based** (`#[rite]`): reads the token from the function signature
83//! - **Tier-based** (`#[rite(v3)]`): specifies features via tier name, no token needed
84//! - **Multi-tier** (`#[rite(v3, v4, neon)]`): generates a suffixed copy for each tier
85//!
86//! **`#[rite]` should be your default.** Use `#[arcane]` only at entry points
87//! (the first call from non-SIMD code). Token-based and tier-based produce
88//! identical output — the token form can be easier to remember if you already
89//! have the token in scope. Multi-tier generates one function per tier, each
90//! compiled with different `#[target_feature]` attributes.
91//!
92//! Use concrete tokens like `X64V3Token` (AVX2+FMA) or `X64V4Token` (AVX-512).
93//! For generic code, use tier traits like `HasX64V2` or `HasX64V4`.
94//!
95//! ## Safety
96//!
97//! Since Rust 1.85, value-based SIMD intrinsics (arithmetic, shuffle, compare,
98//! bitwise) are safe inside `#[target_feature]` functions. Only pointer-based
99//! memory operations remain unsafe — `import_intrinsics` handles this by
100//! providing safe reference-based memory ops that shadow the pointer-based ones.
101//!
102//! Downstream crates can use `#![forbid(unsafe_code)]` when combining archmage
103//! tokens + `#[arcane]`/`#[rite]` macros + `import_intrinsics`.
104//!
105//! ## Feature Flags
106//!
107//! - `std` (default): Enable std library support
108//! - `avx512`: AVX-512 token support
109//!
110//! Macros (`#[arcane]`, `#[rite]`, `incant!`, etc.) are always available.
111
112#![cfg_attr(not(feature = "std"), no_std)]
113#![cfg_attr(docsrs, feature(doc_cfg))]
114#![deny(unsafe_op_in_unsafe_fn)]
115#![warn(missing_docs)]
116
117#[cfg(feature = "std")]
118extern crate std;
119
120extern crate alloc;
121
122// Re-export macros from archmage-macros
123pub use archmage_macros::{
124 arcane, autoversion, dispatch_variant, incant, magetypes, rite, simd_fn, simd_route,
125 token_target_features, token_target_features_boundary,
126};
127
128// Optimized feature detection
129#[cfg(any(target_arch = "x86_64", target_arch = "x86", target_arch = "aarch64"))]
130pub mod detect;
131
132// Core token types and traits
133pub mod tokens;
134
135// Prelude: one import for tokens, traits, macros, and all intrinsics
136pub mod prelude;
137
138// Combined intrinsics namespace (core::arch + safe memory ops, safe wins)
139pub mod intrinsics;
140
141// Test utilities for exhaustive token permutation testing
142#[cfg(feature = "std")]
143#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
144pub mod testing;
145
146// SIMD types moved to magetypes crate
147// Use `magetypes::simd` for f32x8, i32x4, etc.
148
149// ============================================================================
150// Re-exports at crate root for convenience
151// ============================================================================
152
153// Core traits
154pub use tokens::CompileTimeGuaranteedError;
155pub use tokens::DisableAllSimdError;
156pub use tokens::IntoConcreteToken;
157pub use tokens::SimdToken;
158
159// Global SIMD kill switch
160pub use tokens::dangerously_disable_tokens_except_wasm;
161
162// Width marker traits (deprecated — use concrete tokens or tier traits)
163#[allow(deprecated)]
164pub use tokens::{Has128BitSimd, Has256BitSimd, Has512BitSimd};
165
166// x86 tier marker traits (based on LLVM x86-64 microarchitecture levels)
167pub use tokens::HasX64V2;
168pub use tokens::HasX64V4;
169
170// AArch64 tier marker traits
171pub use tokens::{HasArm64V2, HasArm64V3, HasNeon, HasNeonAes, HasNeonSha3};
172
173// All tokens available on all architectures (summon() returns None on wrong arch)
174#[allow(deprecated)]
175pub use tokens::{
176 // ARM tokens
177 Arm64,
178 Arm64V2Token,
179 Arm64V3Token,
180 // x86 tier tokens (aliases still exported for backward compat)
181 Avx2FmaToken,
182 Desktop64,
183 NeonAesToken,
184 NeonCrcToken,
185 NeonSha3Token,
186 NeonToken,
187 // Scalar fallback (always available)
188 ScalarToken,
189 Sse2Token,
190 // WASM tokens
191 Wasm128RelaxedToken,
192 Wasm128Token,
193 X64CryptoToken,
194 X64V1Token,
195 X64V2Token,
196 X64V3CryptoToken,
197 X64V3Token,
198};
199
200// AVX-512 tokens (always available; summon() returns None on unsupported CPUs)
201pub use tokens::{Avx512Fp16Token, Avx512Token, Server64, X64V4Token, X64V4xToken};