#![allow(deprecated)]
use archmage::prelude::*;
#[autoversion]
fn sum_of_squares(_token: SimdToken, data: &[f32]) -> f32 {
let mut sum = 0.0f32;
for &x in data {
sum += x * x;
}
sum
}
#[test]
fn dispatcher_returns_correct_result() {
let data: Vec<f32> = (0..64).map(|i| i as f32).collect();
let expected: f32 = data.iter().map(|x| x * x).sum();
let result = sum_of_squares(&data);
assert!(
(result - expected).abs() < 1e-3,
"dispatcher returned {result}, expected {expected}"
);
}
#[test]
fn scalar_variant_works() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = sum_of_squares_scalar(ScalarToken, &data);
assert!((result - 30.0).abs() < 1e-6, "scalar: {result}");
}
#[cfg(target_arch = "x86_64")]
#[test]
fn v3_variant_works() {
if let Some(token) = X64V3Token::summon() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = sum_of_squares_v3(token, &data);
assert!((result - 30.0).abs() < 1e-6, "v3: {result}");
}
}
#[cfg(target_arch = "x86_64")]
#[test]
fn v4_variant_exists_without_avx512_feature() {
if let Some(token) = X64V4Token::summon() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = sum_of_squares_v4(token, &data);
assert!((result - 30.0).abs() < 1e-6, "v4: {result}");
}
}
#[autoversion(v3, neon)]
fn dot_product(_token: SimdToken, a: &[f32], b: &[f32]) -> f32 {
let n = a.len().min(b.len());
let mut sum = 0.0f32;
for i in 0..n {
sum += a[i] * b[i];
}
sum
}
#[test]
fn explicit_tiers_dispatcher() {
let a = [1.0f32, 2.0, 3.0, 4.0];
let b = [4.0f32, 3.0, 2.0, 1.0];
let result = dot_product(&a, &b);
assert!((result - 20.0).abs() < 1e-6, "dot: {result}");
}
#[test]
fn explicit_tiers_scalar_variant() {
let a = [1.0f32, 2.0, 3.0, 4.0];
let b = [4.0f32, 3.0, 2.0, 1.0];
let result = dot_product_scalar(ScalarToken, &a, &b);
assert!((result - 20.0).abs() < 1e-6, "dot scalar: {result}");
}
#[autoversion]
fn scale_and_offset(_token: SimdToken, data: &[f32], scale: f32, offset: f32) -> Vec<f32> {
data.iter().map(|&x| x * scale + offset).collect()
}
#[test]
fn multi_param_dispatcher() {
let data = [1.0f32, 2.0, 3.0];
let result = scale_and_offset(&data, 2.0, 10.0);
assert_eq!(result, vec![12.0, 14.0, 16.0]);
}
#[autoversion]
fn normalize_inplace(_token: SimdToken, data: &mut [f32], scale: f32) {
for x in data.iter_mut() {
*x *= scale;
}
}
#[test]
fn mutable_slice_dispatcher() {
let mut data = vec![1.0f32, 2.0, 3.0, 4.0];
normalize_inplace(&mut data, 0.5);
assert_eq!(data, vec![0.5, 1.0, 1.5, 2.0]);
}
#[autoversion]
fn prefix_sums(_token: SimdToken, data: &[f32]) -> Vec<f32> {
let mut result = Vec::with_capacity(data.len());
let mut sum = 0.0f32;
for &x in data {
sum += x;
result.push(sum);
}
result
}
#[test]
fn allocating_return_type() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = prefix_sums(&data);
assert_eq!(result, vec![1.0, 3.0, 6.0, 10.0]);
}
struct Buffer {
data: Vec<f32>,
}
impl Buffer {
#[autoversion(_self = Buffer)]
fn total(&self, _token: SimdToken) -> f32 {
_self.data.iter().sum()
}
}
#[test]
fn self_receiver_dispatcher() {
let buf = Buffer {
data: vec![1.0, 2.0, 3.0, 4.0],
};
let result = buf.total();
assert!((result - 10.0).abs() < 1e-6, "total: {result}");
}
impl Buffer {
#[autoversion(_self = Buffer)]
fn scale_all(&mut self, _token: SimdToken, factor: f32) {
for x in _self.data.iter_mut() {
*x *= factor;
}
}
}
struct Counter {
values: Vec<f32>,
}
impl Counter {
#[autoversion]
fn sum(&self, _token: SimdToken) -> f32 {
self.values.iter().sum()
}
#[autoversion]
fn double_all(&mut self, _token: SimdToken) {
for v in self.values.iter_mut() {
*v *= 2.0;
}
}
}
#[test]
fn mut_self_receiver_dispatcher() {
let mut buf = Buffer {
data: vec![1.0, 2.0, 3.0, 4.0],
};
buf.scale_all(3.0);
assert_eq!(buf.data, vec![3.0, 6.0, 9.0, 12.0]);
}
#[test]
fn plain_self_receiver_ref() {
let c = Counter {
values: vec![1.0, 2.0, 3.0],
};
let result = c.sum();
assert!((result - 6.0).abs() < 1e-6, "sum: {result}");
}
#[test]
fn plain_self_receiver_mut() {
let mut c = Counter {
values: vec![1.0, 2.0, 3.0],
};
c.double_all();
assert_eq!(c.values, vec![2.0, 4.0, 6.0]);
}
#[autoversion]
fn sum_wildcard(_: SimdToken, data: &[f32]) -> f32 {
data.iter().sum()
}
#[test]
fn wildcard_token_param() {
let data = [1.0f32, 2.0, 3.0];
let result = sum_wildcard(&data);
assert!((result - 6.0).abs() < 1e-6, "wildcard: {result}");
}
#[test]
fn all_variants_consistent() {
let data: Vec<f32> = (0..128).map(|i| (i as f32) * 0.1).collect();
let expected = sum_of_squares_scalar(ScalarToken, &data);
#[cfg(target_arch = "x86_64")]
{
if let Some(t) = X64V3Token::summon() {
let v3 = sum_of_squares_v3(t, &data);
assert!(
(v3 - expected).abs() < 1e-1,
"v3 ({v3}) != scalar ({expected})"
);
}
}
let dispatched = sum_of_squares(&data);
assert!(
(dispatched - expected).abs() < 1e-1,
"dispatched ({dispatched}) != scalar ({expected})"
);
}
#[test]
fn empty_input() {
let empty: &[f32] = &[];
assert_eq!(sum_of_squares(empty), 0.0);
assert_eq!(dot_product(empty, empty), 0.0);
}
impl Counter {
#[autoversion(v3, neon)]
fn product(&self, _token: SimdToken) -> f32 {
self.values.iter().product()
}
}
#[test]
fn plain_self_with_explicit_tiers() {
let c = Counter {
values: vec![2.0, 3.0, 4.0],
};
assert!((c.product() - 24.0).abs() < 1e-6);
}
impl Counter {
#[autoversion]
fn weighted_sum(&self, _token: SimdToken, weight: f32) -> f32 {
self.values.iter().map(|v| v * weight).sum()
}
}
#[test]
fn plain_self_with_extra_params() {
let c = Counter {
values: vec![1.0, 2.0, 3.0],
};
assert!((c.weighted_sum(10.0) - 60.0).abs() < 1e-6);
}
struct OwnedData {
data: Vec<f32>,
}
impl OwnedData {
#[autoversion]
fn into_sum(self, _token: SimdToken) -> f32 {
self.data.iter().sum()
}
}
#[test]
fn owned_self_receiver() {
let d = OwnedData {
data: vec![1.0, 2.0, 3.0, 4.0],
};
assert!((d.into_sum() - 10.0).abs() < 1e-6);
}
#[autoversion]
fn add_wildcards(_: SimdToken, _: &[f32], _: &[f32]) -> f32 {
42.0
}
#[test]
fn multiple_wildcards() {
let a = [1.0f32];
let b = [2.0f32];
assert_eq!(add_wildcards(&a, &b), 42.0);
}
#[autoversion]
fn min_max(_token: SimdToken, data: &[f32]) -> (f32, f32) {
let mut min = f32::INFINITY;
let mut max = f32::NEG_INFINITY;
for &x in data {
if x < min {
min = x;
}
if x > max {
max = x;
}
}
(min, max)
}
#[test]
fn tuple_return_type() {
let data = [3.0f32, 1.0, 4.0, 1.0, 5.0, 9.0, 2.0, 6.0];
let (min, max) = min_max(&data);
assert!((min - 1.0).abs() < 1e-6);
assert!((max - 9.0).abs() < 1e-6);
}
#[autoversion]
fn find_first_negative(_token: SimdToken, data: &[f32]) -> Option<usize> {
data.iter().position(|&x| x < 0.0)
}
#[test]
fn option_return_type() {
assert_eq!(find_first_negative(&[1.0, -2.0, 3.0]), Some(1));
assert_eq!(find_first_negative(&[1.0, 2.0, 3.0]), None);
}
#[autoversion]
fn sum_i64(_token: SimdToken, data: &[i64]) -> i64 {
data.iter().sum()
}
#[test]
fn integer_data() {
let data: Vec<i64> = (1..=100).collect();
assert_eq!(sum_i64(&data), 5050);
}
#[autoversion]
fn all_positive(_token: SimdToken, data: &[f32]) -> bool {
data.iter().all(|&x| x > 0.0)
}
#[test]
fn boolean_return() {
assert!(all_positive(&[1.0, 2.0, 3.0]));
assert!(!all_positive(&[1.0, -2.0, 3.0]));
}
#[test]
fn scalar_variants_directly_callable() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let s = sum_of_squares_scalar(ScalarToken, &data);
assert!((s - 30.0).abs() < 1e-6);
let c = Counter {
values: vec![1.0, 2.0, 3.0],
};
let s = c.sum_scalar(ScalarToken);
assert!((s - 6.0).abs() < 1e-6);
}
#[cfg(target_arch = "x86_64")]
#[test]
fn v3_method_variant_directly_callable() {
if let Some(token) = X64V3Token::summon() {
let c = Counter {
values: vec![10.0, 20.0, 30.0],
};
let result = c.sum_v3(token);
assert!((result - 60.0).abs() < 1e-6);
}
}
#[autoversion]
fn sum_large(_token: SimdToken, data: &[f32]) -> f32 {
let mut sum = 0.0f32;
for &x in data {
sum += x;
}
sum
}
#[test]
fn large_data_auto_vectorized() {
let data: Vec<f32> = (0..4096).map(|i| i as f32).collect();
let expected: f32 = (0..4096).map(|i| i as f32).sum();
let result = sum_large(&data);
assert!(
(result - expected).abs() < 1.0,
"large sum: got {result}, expected {expected}"
);
}
#[autoversion]
fn clamp_inplace(_token: SimdToken, data: &mut [f32], lo: f32, hi: f32) {
for x in data.iter_mut() {
if *x < lo {
*x = lo;
}
if *x > hi {
*x = hi;
}
}
}
#[test]
fn inplace_mutation_with_bounds() {
let mut data = vec![-5.0, 0.0, 5.0, 10.0, 15.0];
clamp_inplace(&mut data, 0.0, 10.0);
assert_eq!(data, vec![0.0, 0.0, 5.0, 10.0, 10.0]);
}
impl Counter {
#[autoversion]
fn values_ref(&self, _token: SimdToken) -> &[f32] {
&self.values
}
}
#[test]
fn self_receiver_borrowing_return() {
let c = Counter {
values: vec![1.0, 2.0, 3.0],
};
assert_eq!(c.values_ref(), &[1.0, 2.0, 3.0]);
}
#[autoversion]
fn noop(_token: SimdToken, _data: &[f32]) {
}
#[test]
fn unit_return_type() {
noop(&[1.0, 2.0, 3.0]);
}
#[autoversion]
fn sum_array<const N: usize>(_token: SimdToken, data: &[f32; N]) -> f32 {
let mut sum = 0.0f32;
for &x in data {
sum += x;
}
sum
}
#[test]
fn const_generic_basic() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = sum_array(&data);
assert!((result - 10.0).abs() < 1e-6, "const generic: {result}");
}
#[test]
fn const_generic_scalar_variant() {
let data = [1.0f32, 2.0, 3.0];
let result = sum_array_scalar(ScalarToken, &data);
assert!(
(result - 6.0).abs() < 1e-6,
"const generic scalar: {result}"
);
}
#[autoversion]
fn make_zeros<const N: usize>(_token: SimdToken) -> [f32; N] {
[0.0f32; N]
}
#[test]
fn const_generic_return_only() {
let result: [f32; 4] = make_zeros();
assert_eq!(result, [0.0; 4]);
}
#[autoversion]
fn reshape<const M: usize, const N: usize>(_token: SimdToken, data: &[f32; M]) -> [f32; N] {
let mut out = [0.0f32; N];
let len = M.min(N);
let mut i = 0;
while i < len {
out[i] = data[i];
i += 1;
}
out
}
#[test]
fn const_generic_multiple() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result: [f32; 2] = reshape(&data);
assert_eq!(result, [1.0, 2.0]);
}
#[autoversion]
fn sum_generic<const N: usize, T: Default + Copy + core::ops::Add<Output = T>>(
_token: SimdToken,
data: &[T; N],
) -> T {
let mut acc = T::default();
let mut i = 0;
while i < N {
acc = acc + data[i];
i += 1;
}
acc
}
#[test]
fn const_generic_plus_type_generic() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result: f32 = sum_generic(&data);
assert!((result - 10.0).abs() < 1e-6);
}
#[autoversion]
fn chunk_sum<const CHUNK: usize>(_token: SimdToken, data: &[f32]) -> f32 {
let mut total = 0.0f32;
for chunk in data.chunks(CHUNK) {
for &x in chunk {
total += x;
}
}
total
}
#[test]
fn const_generic_body_only() {
let data = [1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let result = chunk_sum::<4>(&data);
assert!((result - 36.0).abs() < 1e-6, "chunk_sum: {result}");
}
struct ConstGenericBuf {
data: [f32; 4],
}
impl ConstGenericBuf {
#[autoversion]
fn extract<const N: usize>(&self, _token: SimdToken) -> [f32; N] {
let mut out = [0.0f32; N];
let len = N.min(4);
let mut i = 0;
while i < len {
out[i] = self.data[i];
i += 1;
}
out
}
#[autoversion(_self = ConstGenericBuf)]
fn extract_nested<const N: usize>(&self, _token: SimdToken) -> [f32; N] {
let mut out = [0.0f32; N];
let len = N.min(4);
let mut i = 0;
while i < len {
out[i] = _self.data[i];
i += 1;
}
out
}
}
#[test]
fn const_generic_self_receiver() {
let buf = ConstGenericBuf {
data: [1.0, 2.0, 3.0, 4.0],
};
let result: [f32; 2] = buf.extract();
assert_eq!(result, [1.0, 2.0]);
}
#[test]
fn const_generic_nested_self() {
let buf = ConstGenericBuf {
data: [1.0, 2.0, 3.0, 4.0],
};
let result: [f32; 3] = buf.extract_nested();
assert_eq!(result, [1.0, 2.0, 3.0]);
}
#[autoversion(v3, neon)]
fn const_sum_explicit<const N: usize>(_token: SimdToken, data: &[f32; N]) -> f32 {
let mut s = 0.0f32;
let mut i = 0;
while i < N {
s += data[i];
i += 1;
}
s
}
#[test]
fn const_generic_explicit_tiers() {
let data = [1.0f32, 2.0, 3.0];
let result = const_sum_explicit(&data);
assert!((result - 6.0).abs() < 1e-6);
}
#[autoversion]
fn first_n_sum<'a, const N: usize>(_token: SimdToken, data: &'a [f32]) -> f32 {
let mut s = 0.0f32;
let end = N.min(data.len());
let mut i = 0;
while i < end {
s += data[i];
i += 1;
}
s
}
#[test]
fn const_generic_with_lifetime() {
let data = [1.0f32, 2.0, 3.0, 4.0, 5.0];
let result = first_n_sum::<3>(&data);
assert!((result - 6.0).abs() < 1e-6);
}
struct PixelRow {
data: Vec<u8>,
}
impl PixelRow {
#[autoversion]
fn fill_row<const BPP: usize>(&self, _token: SimdToken, out: &mut Vec<u8>) {
for chunk in self.data.chunks(BPP) {
out.extend_from_slice(chunk);
}
}
#[autoversion(_self = PixelRow)]
fn fill_row_nested<const BPP: usize>(&self, _token: SimdToken, out: &mut Vec<u8>) {
for chunk in _self.data.chunks(BPP) {
out.extend_from_slice(chunk);
}
}
}
#[test]
fn const_generic_bpp_pattern() {
let row = PixelRow {
data: vec![1, 2, 3, 4, 5, 6],
};
let mut out = Vec::new();
row.fill_row::<3>(&mut out);
assert_eq!(out, vec![1, 2, 3, 4, 5, 6]);
}
#[test]
fn const_generic_bpp_pattern_nested() {
let row = PixelRow {
data: vec![1, 2, 3, 4, 5, 6],
};
let mut out = Vec::new();
row.fill_row_nested::<2>(&mut out);
assert_eq!(out, vec![1, 2, 3, 4, 5, 6]);
}
#[test]
fn const_generic_scalar_turbofish() {
let data = [1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let result = chunk_sum_scalar::<4>(ScalarToken, &data);
assert!((result - 36.0).abs() < 1e-6);
}
#[cfg(target_arch = "x86_64")]
#[test]
fn const_generic_v3_turbofish() {
if let Some(token) = X64V3Token::summon() {
let data = [1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let result = chunk_sum_v3::<4>(token, &data);
assert!((result - 36.0).abs() < 1e-6);
}
}
struct Accum {
bias: f32,
}
impl Accum {
#[autoversion]
fn sum_plain(&self, _token: SimdToken, data: &[f32]) -> f32 {
self.bias + data.iter().sum::<f32>()
}
#[autoversion(_self = Accum)]
fn sum_nested(&self, _token: SimdToken, data: &[f32]) -> f32 {
_self.bias + data.iter().sum::<f32>()
}
}
#[test]
fn plain_vs_nested_self_consistent() {
let a = Accum { bias: 100.0 };
let data = [1.0f32, 2.0, 3.0];
let plain = a.sum_plain(&data);
let nested = a.sum_nested(&data);
assert!(
(plain - nested).abs() < 1e-6,
"plain ({plain}) != nested ({nested})"
);
assert!((plain - 106.0).abs() < 1e-6);
}
#[autoversion]
fn inner_product(a: &[f32], b: &[f32]) -> f32 {
let mut sum = 0.0f32;
let len = a.len().min(b.len());
for i in 0..len {
sum += a[i] * b[i];
}
sum
}
#[test]
fn tokenless_dispatcher_works() {
let a = [1.0f32, 2.0, 3.0, 4.0];
let b = [4.0f32, 3.0, 2.0, 1.0];
let result = inner_product(&a, &b);
assert!((result - 20.0).abs() < 1e-6, "tokenless: {result}");
}
#[test]
fn tokenless_scalar_variant_callable() {
let a = [1.0f32, 2.0, 3.0];
let b = [3.0f32, 2.0, 1.0];
let result = inner_product_scalar(ScalarToken, &a, &b);
assert!((result - 10.0).abs() < 1e-6, "scalar: {result}");
}
#[cfg(target_arch = "x86_64")]
#[test]
fn tokenless_v3_variant_callable() {
if let Some(token) = X64V3Token::summon() {
let a = [1.0f32, 2.0, 3.0, 4.0];
let b = [4.0f32, 3.0, 2.0, 1.0];
let result = inner_product_v3(token, &a, &b);
assert!((result - 20.0).abs() < 1e-6, "v3: {result}");
}
}
#[autoversion(v3, neon)]
fn scale_sum(data: &[f32], factor: f32) -> f32 {
let mut sum = 0.0f32;
for &x in data {
sum += x * factor;
}
sum
}
#[test]
fn tokenless_explicit_tiers() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = scale_sum(&data, 2.0);
assert!((result - 20.0).abs() < 1e-6, "scale_sum: {result}");
}
#[autoversion]
fn fill_chunked<const N: usize>(data: &mut [f32], val: f32) {
for chunk in data.chunks_mut(N) {
for x in chunk {
*x = val;
}
}
}
#[test]
fn tokenless_const_generic() {
let mut data = [0.0f32; 16];
fill_chunked::<4>(&mut data, 42.0);
assert!(data.iter().all(|&x| (x - 42.0).abs() < 1e-6));
}
struct TokenlessBuffer {
data: Vec<f32>,
}
impl TokenlessBuffer {
#[autoversion]
fn total(&self) -> f32 {
self.data.iter().sum()
}
#[autoversion]
fn scale_all(&mut self, factor: f32) {
for x in self.data.iter_mut() {
*x *= factor;
}
}
#[autoversion]
fn into_total(self) -> f32 {
self.data.iter().sum()
}
#[autoversion]
fn values_ref(&self) -> &[f32] {
&self.data
}
#[autoversion]
fn weighted_sum(&self, weight: f32) -> f32 {
self.data.iter().map(|v| v * weight).sum()
}
#[autoversion(v3, neon)]
fn product(&self) -> f32 {
self.data.iter().product()
}
}
#[test]
fn tokenless_ref_self() {
let buf = TokenlessBuffer {
data: vec![1.0, 2.0, 3.0, 4.0],
};
assert!((buf.total() - 10.0).abs() < 1e-6);
}
#[test]
fn tokenless_mut_self() {
let mut buf = TokenlessBuffer {
data: vec![1.0, 2.0, 3.0],
};
buf.scale_all(3.0);
assert_eq!(buf.data, vec![3.0, 6.0, 9.0]);
}
#[test]
fn tokenless_owned_self() {
let buf = TokenlessBuffer {
data: vec![1.0, 2.0, 3.0, 4.0],
};
assert!((buf.into_total() - 10.0).abs() < 1e-6);
}
#[test]
fn tokenless_borrowing_return() {
let buf = TokenlessBuffer {
data: vec![1.0, 2.0, 3.0],
};
assert_eq!(buf.values_ref(), &[1.0, 2.0, 3.0]);
}
#[test]
fn tokenless_self_with_extra_params() {
let buf = TokenlessBuffer {
data: vec![1.0, 2.0, 3.0],
};
assert!((buf.weighted_sum(10.0) - 60.0).abs() < 1e-6);
}
#[test]
fn tokenless_self_explicit_tiers() {
let buf = TokenlessBuffer {
data: vec![2.0, 3.0, 4.0],
};
assert!((buf.product() - 24.0).abs() < 1e-6);
}
#[test]
fn tokenless_scalar_method_variant() {
let buf = TokenlessBuffer {
data: vec![1.0, 2.0, 3.0],
};
let s = buf.total_scalar(ScalarToken);
assert!((s - 6.0).abs() < 1e-6);
}
#[cfg(target_arch = "x86_64")]
#[test]
fn tokenless_v3_method_variant() {
if let Some(token) = X64V3Token::summon() {
let buf = TokenlessBuffer {
data: vec![10.0, 20.0, 30.0],
};
let result = buf.total_v3(token);
assert!((result - 60.0).abs() < 1e-6);
}
}
struct TokenlessNested {
bias: f32,
}
impl TokenlessNested {
#[autoversion(_self = TokenlessNested)]
fn biased_sum(&self, data: &[f32]) -> f32 {
_self.bias + data.iter().sum::<f32>()
}
#[autoversion(_self = TokenlessNested)]
fn biased_scale(&mut self, data: &[f32], factor: f32) -> f32 {
_self.bias * factor + data.iter().sum::<f32>()
}
}
#[test]
fn tokenless_nested_ref_self() {
let n = TokenlessNested { bias: 100.0 };
let data = [1.0f32, 2.0, 3.0];
assert!((n.biased_sum(&data) - 106.0).abs() < 1e-6);
}
#[test]
fn tokenless_nested_mut_self() {
let mut n = TokenlessNested { bias: 10.0 };
let data = [1.0f32, 2.0, 3.0];
assert!((n.biased_scale(&data, 2.0) - 26.0).abs() < 1e-6);
}
#[autoversion]
fn parity_explicit(_token: SimdToken, data: &[f32]) -> f32 {
data.iter().map(|x| x * x).sum()
}
#[autoversion]
fn parity_tokenless(data: &[f32]) -> f32 {
data.iter().map(|x| x * x).sum()
}
#[test]
fn explicit_and_tokenless_produce_same_result() {
let data: Vec<f32> = (0..256).map(|i| i as f32 * 0.1).collect();
let explicit = parity_explicit(&data);
let tokenless = parity_tokenless(&data);
assert!(
(explicit - tokenless).abs() < 1e-3,
"explicit ({explicit}) != tokenless ({tokenless})"
);
}
#[test]
fn explicit_and_tokenless_scalar_variants_match() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let explicit = parity_explicit_scalar(ScalarToken, &data);
let tokenless = parity_tokenless_scalar(ScalarToken, &data);
assert!(
(explicit - tokenless).abs() < 1e-6,
"scalar: explicit ({explicit}) != tokenless ({tokenless})"
);
}
#[arcane]
fn nested_dispatch_v3(_token: X64V3Token, data: &[f32]) -> f32 {
data.iter().sum::<f32>() * 1000.0
}
#[autoversion(v3, neon)]
fn nested_dispatch_fallback(data: &[f32]) -> f32 {
data.iter().sum()
}
fn nested_dispatch_scalar(_: ScalarToken, data: &[f32]) -> f32 {
nested_dispatch_fallback(data)
}
fn nested_dispatch(data: &[f32]) -> f32 {
incant!(nested_dispatch(data), [v3, scalar])
}
#[test]
fn incant_nesting_dispatches_correctly() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = nested_dispatch(&data);
assert!(result.is_finite(), "nested dispatch: {result}");
}
#[test]
fn incant_nesting_scalar_fallback_works() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = nested_dispatch_scalar(ScalarToken, &data);
assert!((result - 10.0).abs() < 1e-6, "scalar bridge: {result}");
}
#[test]
fn incant_nesting_autoversion_fallback_directly() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = nested_dispatch_fallback(&data);
assert!(
(result - 10.0).abs() < 1e-6,
"autoversion fallback: {result}"
);
}
#[test]
fn incant_nesting_fallback_scalar_variant() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = nested_dispatch_fallback_scalar(ScalarToken, &data);
assert!(
(result - 10.0).abs() < 1e-6,
"fallback scalar variant: {result}"
);
}
#[cfg(target_arch = "x86_64")]
#[test]
fn incant_nesting_v3_path_identified() {
if let Some(token) = X64V3Token::summon() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let v3_result = nested_dispatch_v3(token, &data);
assert!(
(v3_result - 10000.0).abs() < 1e-3,
"hand-written v3: {v3_result}"
);
let dispatched = nested_dispatch(&data);
assert!(
(dispatched - 10000.0).abs() < 1e-3,
"dispatched should pick v3: {dispatched}"
);
}
}
struct NestedProcessor {
scale: f32,
}
impl NestedProcessor {
pub fn process(&self, data: &[f32]) -> f32 {
#[cfg(target_arch = "x86_64")]
if let Some(token) = X64V3Token::summon() {
return self.process_v3(token, data);
}
self.process_scalar(ScalarToken, data)
}
#[arcane]
fn process_v3(&self, _token: X64V3Token, data: &[f32]) -> f32 {
data.iter().sum::<f32>() * self.scale * 100.0 }
fn process_scalar(&self, _: ScalarToken, data: &[f32]) -> f32 {
self.process_auto(data)
}
#[autoversion(v3, neon)]
fn process_auto(&self, data: &[f32]) -> f32 {
data.iter().sum::<f32>() * self.scale
}
}
#[test]
fn incant_nesting_method_dispatches() {
let p = NestedProcessor { scale: 2.0 };
let data = [1.0f32, 2.0, 3.0];
let result = p.process(&data);
assert!(result.is_finite(), "method nested dispatch: {result}");
}
#[test]
fn incant_nesting_method_scalar_bridge() {
let p = NestedProcessor { scale: 2.0 };
let data = [1.0f32, 2.0, 3.0];
let result = p.process_scalar(ScalarToken, &data);
assert!(
(result - 12.0).abs() < 1e-6,
"method scalar bridge: {result}"
);
}
#[test]
fn incant_nesting_method_auto_directly() {
let p = NestedProcessor { scale: 3.0 };
let data = [1.0f32, 2.0, 3.0];
let result = p.process_auto(&data);
assert!(
(result - 18.0).abs() < 1e-6,
"method auto dispatch: {result}"
);
}
#[arcane]
fn bridgeless_v3(_token: X64V3Token, data: &[f32]) -> f32 {
data.iter().sum::<f32>() * 1000.0 }
#[autoversion(v3, neon)]
fn bridgeless_scalar(_: ScalarToken, data: &[f32]) -> f32 {
data.iter().sum()
}
fn bridgeless(data: &[f32]) -> f32 {
incant!(bridgeless(data), [v3, scalar])
}
#[test]
fn scalar_token_nesting_dispatches() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = bridgeless(&data);
assert!(result.is_finite(), "bridgeless dispatch: {result}");
}
#[test]
fn scalar_token_nesting_scalar_directly() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = bridgeless_scalar(ScalarToken, &data);
assert!((result - 10.0).abs() < 1e-6, "scalar direct: {result}");
}
#[test]
fn scalar_token_nesting_scalar_variant() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = bridgeless_scalar_scalar(ScalarToken, &data);
assert!((result - 10.0).abs() < 1e-6, "scalar variant: {result}");
}
#[cfg(target_arch = "x86_64")]
#[test]
fn scalar_token_nesting_v3_path() {
if let Some(token) = X64V3Token::summon() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let v3_result = bridgeless_v3(token, &data);
assert!(
(v3_result - 10000.0).abs() < 1e-3,
"hand-written v3: {v3_result}"
);
let dispatched = bridgeless(&data);
assert!(
(dispatched - 10000.0).abs() < 1e-3,
"should pick v3: {dispatched}"
);
}
}
struct BridgelessProcessor {
scale: f32,
}
impl BridgelessProcessor {
pub fn process(&self, data: &[f32]) -> f32 {
#[cfg(target_arch = "x86_64")]
if let Some(token) = X64V3Token::summon() {
return self.process_v3(token, data);
}
self.process_scalar(ScalarToken, data)
}
#[arcane]
fn process_v3(&self, _token: X64V3Token, data: &[f32]) -> f32 {
data.iter().sum::<f32>() * self.scale * 100.0
}
#[autoversion(v3, neon)]
fn process_scalar(&self, _: ScalarToken, data: &[f32]) -> f32 {
data.iter().sum::<f32>() * self.scale
}
}
#[test]
fn scalar_token_nesting_method() {
let p = BridgelessProcessor { scale: 2.0 };
let data = [1.0f32, 2.0, 3.0];
let result = p.process(&data);
assert!(result.is_finite(), "method bridgeless: {result}");
}
#[test]
fn scalar_token_nesting_method_scalar_directly() {
let p = BridgelessProcessor { scale: 2.0 };
let data = [1.0f32, 2.0, 3.0];
let result = p.process_scalar(ScalarToken, &data);
assert!(
(result - 12.0).abs() < 1e-6,
"method scalar direct: {result}"
);
}
#[arcane]
fn default_tier_v3(_token: X64V3Token, data: &[f32]) -> f32 {
data.iter().sum::<f32>() * 1000.0
}
#[autoversion(v3, neon)]
fn default_tier_default(data: &[f32]) -> f32 {
data.iter().sum()
}
fn default_tier(data: &[f32]) -> f32 {
incant!(default_tier(data), [v3, default])
}
#[test]
fn default_tier_dispatches() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = default_tier(&data);
assert!(result.is_finite(), "default tier: {result}");
}
#[test]
fn default_tier_fallback_directly() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = default_tier_default(&data);
assert!((result - 10.0).abs() < 1e-6, "default fallback: {result}");
}
#[cfg(target_arch = "x86_64")]
#[test]
fn default_tier_picks_v3() {
if let Some(token) = X64V3Token::summon() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let v3 = default_tier_v3(token, &data);
assert!((v3 - 10000.0).abs() < 1e-3, "v3: {v3}");
let dispatched = default_tier(&data);
assert!(
(dispatched - 10000.0).abs() < 1e-3,
"dispatch: {dispatched}"
);
}
}
#[autoversion(v3, neon, default)]
fn auto_with_default(data: &[f32]) -> f32 {
data.iter().sum()
}
#[test]
fn autoversion_default_tier() {
let data = [1.0f32, 2.0, 3.0];
let result = auto_with_default(&data);
assert!((result - 6.0).abs() < 1e-6, "auto default: {result}");
}
#[test]
fn autoversion_default_variant_callable() {
let data = [1.0f32, 2.0, 3.0];
let result = auto_with_default_default(&data);
assert!((result - 6.0).abs() < 1e-6, "default variant: {result}");
}
struct DefaultProcessor {
scale: f32,
}
impl DefaultProcessor {
pub fn process(&self, data: &[f32]) -> f32 {
#[cfg(target_arch = "x86_64")]
if let Some(token) = X64V3Token::summon() {
return self.process_v3(token, data);
}
self.process_default(data)
}
#[arcane]
fn process_v3(&self, _token: X64V3Token, data: &[f32]) -> f32 {
data.iter().sum::<f32>() * self.scale * 100.0
}
#[autoversion(v3, neon)]
fn process_default(&self, data: &[f32]) -> f32 {
data.iter().sum::<f32>() * self.scale
}
}
#[test]
fn default_tier_method() {
let p = DefaultProcessor { scale: 2.0 };
let data = [1.0f32, 2.0, 3.0];
let result = p.process(&data);
assert!(result.is_finite(), "method default: {result}");
}
#[test]
fn default_tier_method_fallback() {
let p = DefaultProcessor { scale: 2.0 };
let data = [1.0f32, 2.0, 3.0];
let result = p.process_default(&data);
assert!((result - 12.0).abs() < 1e-6, "method default: {result}");
}
#[arcane]
fn resample_scalar_sol1_v3(_: X64V3Token, data: &[f32], factor: f32) -> f32 {
data.iter().map(|x| x * factor).sum::<f32>() * 1000.0
}
#[autoversion(v3, neon)]
fn resample_scalar_sol1_scalar(_: ScalarToken, data: &[f32], factor: f32) -> f32 {
data.iter().map(|x| x * factor).sum()
}
fn resample_scalar_sol1(data: &[f32], factor: f32) -> f32 {
incant!(resample_scalar_sol1(data, factor), [v3, scalar])
}
#[test]
fn nesting_pitfall_scalar_token_solution() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = resample_scalar_sol1(&data, 2.0);
assert!(result.is_finite(), "ScalarToken solution: {result}");
let scalar = resample_scalar_sol1_scalar(ScalarToken, &data, 2.0);
assert!((scalar - 20.0).abs() < 1e-6, "direct scalar: {scalar}");
}
#[arcane]
fn resample_default_sol2_v3(_: X64V3Token, data: &[f32], factor: f32) -> f32 {
data.iter().map(|x| x * factor).sum::<f32>() * 1000.0
}
#[autoversion(v3, neon)]
fn resample_default_sol2_default(data: &[f32], factor: f32) -> f32 {
data.iter().map(|x| x * factor).sum()
}
fn resample_default_sol2(data: &[f32], factor: f32) -> f32 {
incant!(resample_default_sol2(data, factor), [v3, default])
}
#[test]
fn nesting_pitfall_default_tier_solution() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = resample_default_sol2(&data, 2.0);
assert!(result.is_finite(), "default tier solution: {result}");
let fallback = resample_default_sol2_default(&data, 2.0);
assert!((fallback - 20.0).abs() < 1e-6, "direct default: {fallback}");
}
#[arcane]
fn resample_bridge_sol3_v3(_: X64V3Token, data: &[f32], factor: f32) -> f32 {
data.iter().map(|x| x * factor).sum::<f32>() * 1000.0
}
#[autoversion(v3, neon)]
fn resample_bridge_sol3_auto(data: &[f32], factor: f32) -> f32 {
data.iter().map(|x| x * factor).sum()
}
fn resample_bridge_sol3_scalar(_: ScalarToken, data: &[f32], factor: f32) -> f32 {
resample_bridge_sol3_auto(data, factor)
}
fn resample_bridge_sol3(data: &[f32], factor: f32) -> f32 {
incant!(resample_bridge_sol3(data, factor), [v3, scalar])
}
#[test]
fn nesting_pitfall_bridge_solution() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let result = resample_bridge_sol3(&data, 2.0);
assert!(result.is_finite(), "bridge solution: {result}");
}
#[test]
fn nesting_pitfall_all_solutions_agree() {
let data = [1.0f32, 2.0, 3.0, 4.0];
let factor = 2.0;
let s1 = resample_scalar_sol1_scalar(ScalarToken, &data, factor);
let s2 = resample_default_sol2_default(&data, factor);
let s3 = resample_bridge_sol3_auto(&data, factor);
assert!((s1 - s2).abs() < 1e-6, "sol1 ({s1}) != sol2 ({s2})");
assert!((s2 - s3).abs() < 1e-6, "sol2 ({s2}) != sol3 ({s3})");
}