use ferray_core::Array;
use ferray_core::dimension::broadcast::broadcast_shapes;
use ferray_core::dimension::{Dimension, IxDyn};
use ferray_core::dtype::Element;
use ferray_core::error::{FerrayError, FerrayResult};
use rayon::prelude::*;
use crate::parallel::THRESHOLD_COMPUTE_BOUND;
#[inline]
pub(crate) unsafe fn reinterpret_array<SRC, DST, D>(arr: Array<SRC, D>) -> Array<DST, D>
where
SRC: Element,
DST: Element,
D: Dimension,
{
debug_assert_eq!(std::mem::size_of::<SRC>(), std::mem::size_of::<DST>());
debug_assert_eq!(std::mem::align_of::<SRC>(), std::mem::align_of::<DST>());
unsafe { std::mem::transmute_copy(&std::mem::ManuallyDrop::new(arr)) }
}
const PARALLEL_CHUNK: usize = 65_536;
#[inline]
fn parallel_unary_fill_threshold<T, U, F>(src: &[T], out: &mut [U], threshold: usize, f: F)
where
T: Copy + Sync,
U: Send,
F: Fn(T) -> U + Sync + Send,
{
let n = src.len();
debug_assert_eq!(out.len(), n);
if n >= threshold {
out.par_chunks_mut(PARALLEL_CHUNK)
.zip(src.par_chunks(PARALLEL_CHUNK))
.for_each(|(out_chunk, in_chunk)| {
for (o, &x) in out_chunk.iter_mut().zip(in_chunk.iter()) {
*o = f(x);
}
});
} else {
for (o, &x) in out.iter_mut().zip(src.iter()) {
*o = f(x);
}
}
}
#[inline]
pub(crate) fn contig_input<T, D>(input: &Array<T, D>) -> std::borrow::Cow<'_, [T]>
where
T: Element + Copy,
D: Dimension,
{
if let Some(slice) = input.as_slice() {
std::borrow::Cow::Borrowed(slice)
} else {
std::borrow::Cow::Owned(input.iter().copied().collect())
}
}
#[inline]
pub fn unary_float_op<T, D>(input: &Array<T, D>, f: impl Fn(T) -> T) -> FerrayResult<Array<T, D>>
where
T: Element + Copy,
D: Dimension,
{
let src = contig_input(input);
let n = src.len();
let mut data = Vec::with_capacity(n);
#[allow(clippy::uninit_vec)]
unsafe {
data.set_len(n);
}
for (o, &x) in data.iter_mut().zip(src.iter()) {
*o = f(x);
}
Array::from_vec(input.dim().clone(), data)
}
#[inline]
pub fn unary_float_op_compute<T, D>(
input: &Array<T, D>,
f: impl Fn(T) -> T + Sync + Send,
) -> FerrayResult<Array<T, D>>
where
T: Element + Copy,
D: Dimension,
{
let src = contig_input(input);
let n = src.len();
let mut data = Vec::with_capacity(n);
#[allow(clippy::uninit_vec)]
unsafe {
data.set_len(n);
}
parallel_unary_fill_threshold(&src, &mut data, THRESHOLD_COMPUTE_BOUND, f);
Array::from_vec(input.dim().clone(), data)
}
#[inline]
pub fn unary_slice_op_f64<D>(
input: &Array<f64, D>,
kernel: fn(&[f64], &mut [f64]),
) -> FerrayResult<Array<f64, D>>
where
D: Dimension,
{
let n = input.size();
let src = contig_input(input);
let mut data = Vec::with_capacity(n);
#[allow(clippy::uninit_vec)]
unsafe {
data.set_len(n);
}
run_slice_kernel_f64(&src, &mut data, kernel);
Array::from_vec(input.dim().clone(), data)
}
#[inline]
pub fn unary_slice_op_f32<D>(
input: &Array<f32, D>,
kernel: fn(&[f32], &mut [f32]),
) -> FerrayResult<Array<f32, D>>
where
D: Dimension,
{
let n = input.size();
let src = contig_input(input);
let mut data = Vec::with_capacity(n);
#[allow(clippy::uninit_vec)]
unsafe {
data.set_len(n);
}
run_slice_kernel_f32(&src, &mut data, kernel);
Array::from_vec(input.dim().clone(), data)
}
#[inline]
fn run_slice_kernel_f64(src: &[f64], out: &mut [f64], kernel: fn(&[f64], &mut [f64])) {
let n = src.len();
debug_assert_eq!(out.len(), n);
if n >= THRESHOLD_COMPUTE_BOUND {
out.par_chunks_mut(PARALLEL_CHUNK)
.zip(src.par_chunks(PARALLEL_CHUNK))
.for_each(|(out_chunk, in_chunk)| {
kernel(in_chunk, out_chunk);
});
} else {
kernel(src, out);
}
}
#[inline]
fn run_slice_kernel_f32(src: &[f32], out: &mut [f32], kernel: fn(&[f32], &mut [f32])) {
let n = src.len();
debug_assert_eq!(out.len(), n);
if n >= THRESHOLD_COMPUTE_BOUND {
out.par_chunks_mut(PARALLEL_CHUNK)
.zip(src.par_chunks(PARALLEL_CHUNK))
.for_each(|(out_chunk, in_chunk)| {
kernel(in_chunk, out_chunk);
});
} else {
kernel(src, out);
}
}
#[inline]
pub fn try_simd_f64_unary<T, D>(
input: &Array<T, D>,
kernel: fn(&[f64], &mut [f64]),
) -> Option<FerrayResult<Array<T, D>>>
where
T: Element + Copy,
D: Dimension,
{
use std::any::TypeId;
if TypeId::of::<T>() != TypeId::of::<f64>() {
return None;
}
let n = input.size();
let src_t = contig_input(input);
let src_f64: &[f64] = unsafe { std::slice::from_raw_parts(src_t.as_ptr().cast::<f64>(), n) };
let mut output = Vec::with_capacity(n);
#[allow(clippy::uninit_vec)]
unsafe {
output.set_len(n);
}
run_slice_kernel_f64(src_f64, &mut output, kernel);
let cap = output.capacity();
let t_vec: Vec<T> = unsafe {
let mut md = std::mem::ManuallyDrop::new(output);
Vec::from_raw_parts(md.as_mut_ptr().cast::<T>(), n, cap)
};
Some(Array::from_vec(input.dim().clone(), t_vec))
}
#[inline]
pub fn try_simd_f32_unary<T, D>(
input: &Array<T, D>,
kernel: fn(&[f32], &mut [f32]),
) -> Option<FerrayResult<Array<T, D>>>
where
T: Element + Copy,
D: Dimension,
{
use std::any::TypeId;
if TypeId::of::<T>() != TypeId::of::<f32>() {
return None;
}
let n = input.size();
let src_t = contig_input(input);
let src_f32: &[f32] = unsafe { std::slice::from_raw_parts(src_t.as_ptr().cast::<f32>(), n) };
let mut output = Vec::with_capacity(n);
#[allow(clippy::uninit_vec)]
unsafe {
output.set_len(n);
}
run_slice_kernel_f32(src_f32, &mut output, kernel);
let cap = output.capacity();
let t_vec: Vec<T> = unsafe {
let mut md = std::mem::ManuallyDrop::new(output);
Vec::from_raw_parts(md.as_mut_ptr().cast::<T>(), n, cap)
};
Some(Array::from_vec(input.dim().clone(), t_vec))
}
#[inline]
fn run_slice_kernel_binary_f64(
a: &[f64],
b: &[f64],
out: &mut [f64],
kernel: fn(&[f64], &[f64], &mut [f64]),
) {
let n = a.len();
debug_assert_eq!(b.len(), n);
debug_assert_eq!(out.len(), n);
if n >= THRESHOLD_COMPUTE_BOUND {
out.par_chunks_mut(PARALLEL_CHUNK)
.zip(a.par_chunks(PARALLEL_CHUNK))
.zip(b.par_chunks(PARALLEL_CHUNK))
.for_each(|((out_chunk, a_chunk), b_chunk)| {
kernel(a_chunk, b_chunk, out_chunk);
});
} else {
kernel(a, b, out);
}
}
#[inline]
fn run_slice_kernel_binary_f32(
a: &[f32],
b: &[f32],
out: &mut [f32],
kernel: fn(&[f32], &[f32], &mut [f32]),
) {
let n = a.len();
debug_assert_eq!(b.len(), n);
debug_assert_eq!(out.len(), n);
if n >= THRESHOLD_COMPUTE_BOUND {
out.par_chunks_mut(PARALLEL_CHUNK)
.zip(a.par_chunks(PARALLEL_CHUNK))
.zip(b.par_chunks(PARALLEL_CHUNK))
.for_each(|((out_chunk, a_chunk), b_chunk)| {
kernel(a_chunk, b_chunk, out_chunk);
});
} else {
kernel(a, b, out);
}
}
#[inline]
pub fn try_simd_f64_binary<T, D>(
a: &Array<T, D>,
b: &Array<T, D>,
kernel: fn(&[f64], &[f64], &mut [f64]),
) -> Option<FerrayResult<Array<T, D>>>
where
T: Element + Copy,
D: Dimension,
{
use std::any::TypeId;
if TypeId::of::<T>() != TypeId::of::<f64>() {
return None;
}
if a.shape() != b.shape() {
return None;
}
let n = a.size();
let a_src = contig_input(a);
let b_src = contig_input(b);
let a_f64: &[f64] = unsafe { std::slice::from_raw_parts(a_src.as_ptr().cast::<f64>(), n) };
let b_f64: &[f64] = unsafe { std::slice::from_raw_parts(b_src.as_ptr().cast::<f64>(), n) };
let mut output = Vec::with_capacity(n);
#[allow(clippy::uninit_vec)]
unsafe {
output.set_len(n);
}
run_slice_kernel_binary_f64(a_f64, b_f64, &mut output, kernel);
let cap = output.capacity();
let t_vec: Vec<T> = unsafe {
let mut md = std::mem::ManuallyDrop::new(output);
Vec::from_raw_parts(md.as_mut_ptr().cast::<T>(), n, cap)
};
Some(Array::from_vec(a.dim().clone(), t_vec))
}
#[inline]
pub fn try_simd_f32_binary<T, D>(
a: &Array<T, D>,
b: &Array<T, D>,
kernel: fn(&[f32], &[f32], &mut [f32]),
) -> Option<FerrayResult<Array<T, D>>>
where
T: Element + Copy,
D: Dimension,
{
use std::any::TypeId;
if TypeId::of::<T>() != TypeId::of::<f32>() {
return None;
}
if a.shape() != b.shape() {
return None;
}
let n = a.size();
let a_src = contig_input(a);
let b_src = contig_input(b);
let a_f32: &[f32] = unsafe { std::slice::from_raw_parts(a_src.as_ptr().cast::<f32>(), n) };
let b_f32: &[f32] = unsafe { std::slice::from_raw_parts(b_src.as_ptr().cast::<f32>(), n) };
let mut output = Vec::with_capacity(n);
#[allow(clippy::uninit_vec)]
unsafe {
output.set_len(n);
}
run_slice_kernel_binary_f32(a_f32, b_f32, &mut output, kernel);
let cap = output.capacity();
let t_vec: Vec<T> = unsafe {
let mut md = std::mem::ManuallyDrop::new(output);
Vec::from_raw_parts(md.as_mut_ptr().cast::<T>(), n, cap)
};
Some(Array::from_vec(a.dim().clone(), t_vec))
}
#[inline]
pub fn unary_map_op<T, U, D>(input: &Array<T, D>, f: impl Fn(T) -> U) -> FerrayResult<Array<U, D>>
where
T: Element + Copy,
U: Element,
D: Dimension,
{
let src = contig_input(input);
let data: Vec<U> = src.iter().map(|&x| f(x)).collect();
Array::from_vec(input.dim().clone(), data)
}
#[inline]
pub fn binary_elementwise_op<T, D>(
a: &Array<T, D>,
b: &Array<T, D>,
f: impl Fn(T, T) -> T,
) -> FerrayResult<Array<T, D>>
where
T: Element + Copy,
D: Dimension,
{
if a.shape() == b.shape() {
let a_src = contig_input(a);
let b_src = contig_input(b);
let data: Vec<T> = a_src
.iter()
.zip(b_src.iter())
.map(|(&x, &y)| f(x, y))
.collect();
return Array::from_vec(a.dim().clone(), data);
}
let target_shape = broadcast_shapes(a.shape(), b.shape()).map_err(|_| {
FerrayError::shape_mismatch(format!(
"binary op: shapes {:?} and {:?} are not broadcast-compatible",
a.shape(),
b.shape()
))
})?;
let a_view = a.broadcast_to(&target_shape)?;
let b_view = b.broadcast_to(&target_shape)?;
let data: Vec<T> = a_view
.iter()
.zip(b_view.iter())
.map(|(&x, &y)| f(x, y))
.collect();
let result_dim = D::from_dim_slice(&target_shape).ok_or_else(|| {
FerrayError::shape_mismatch(format!(
"binary op: cannot represent broadcast result shape {target_shape:?} as the input dimension type"
))
})?;
Array::from_vec(result_dim, data)
}
#[inline]
pub fn binary_map_op<T, U, D>(
a: &Array<T, D>,
b: &Array<T, D>,
f: impl Fn(T, T) -> U,
) -> FerrayResult<Array<U, D>>
where
T: Element + Copy,
U: Element,
D: Dimension,
{
if a.shape() == b.shape() {
let a_src = contig_input(a);
let b_src = contig_input(b);
let data: Vec<U> = a_src
.iter()
.zip(b_src.iter())
.map(|(&x, &y)| f(x, y))
.collect();
return Array::from_vec(a.dim().clone(), data);
}
let target_shape = broadcast_shapes(a.shape(), b.shape()).map_err(|_| {
FerrayError::shape_mismatch(format!(
"binary op: shapes {:?} and {:?} are not broadcast-compatible",
a.shape(),
b.shape()
))
})?;
let a_view = a.broadcast_to(&target_shape)?;
let b_view = b.broadcast_to(&target_shape)?;
let data: Vec<U> = a_view
.iter()
.zip(b_view.iter())
.map(|(&x, &y)| f(x, y))
.collect();
let result_dim = D::from_dim_slice(&target_shape).ok_or_else(|| {
FerrayError::shape_mismatch(format!(
"binary op: cannot represent broadcast result shape {target_shape:?} as the input dimension type"
))
})?;
Array::from_vec(result_dim, data)
}
#[inline]
pub fn binary_mixed_op<T, U, V, D>(
a: &Array<T, D>,
b: &Array<U, D>,
f: impl Fn(T, U) -> V,
) -> FerrayResult<Array<V, D>>
where
T: Element + Copy,
U: Element + Copy,
V: Element,
D: Dimension,
{
if a.shape() == b.shape() {
let a_src = contig_input(a);
let b_src = contig_input(b);
let data: Vec<V> = a_src
.iter()
.zip(b_src.iter())
.map(|(&x, &y)| f(x, y))
.collect();
return Array::from_vec(a.dim().clone(), data);
}
let target_shape = broadcast_shapes(a.shape(), b.shape()).map_err(|_| {
FerrayError::shape_mismatch(format!(
"binary mixed op: shapes {:?} and {:?} are not broadcast-compatible",
a.shape(),
b.shape()
))
})?;
let a_view = a.broadcast_to(&target_shape)?;
let b_view = b.broadcast_to(&target_shape)?;
let data: Vec<V> = a_view
.iter()
.zip(b_view.iter())
.map(|(&x, &y)| f(x, y))
.collect();
let result_dim = D::from_dim_slice(&target_shape).ok_or_else(|| {
FerrayError::shape_mismatch(format!(
"binary mixed op: cannot represent broadcast result shape {target_shape:?} as the input dimension type"
))
})?;
Array::from_vec(result_dim, data)
}
pub fn binary_broadcast_op<T, D1, D2>(
a: &Array<T, D1>,
b: &Array<T, D2>,
f: impl Fn(T, T) -> T,
) -> FerrayResult<Array<T, IxDyn>>
where
T: Element + Copy,
D1: Dimension,
D2: Dimension,
{
let shape = broadcast_shapes(a.shape(), b.shape())?;
let a_view = a.broadcast_to(&shape)?;
let b_view = b.broadcast_to(&shape)?;
let data: Vec<T> = a_view
.iter()
.zip(b_view.iter())
.map(|(&x, &y)| f(x, y))
.collect();
Array::from_vec(IxDyn::from(&shape[..]), data)
}
pub fn binary_broadcast_map_op<T, U, D1, D2>(
a: &Array<T, D1>,
b: &Array<T, D2>,
f: impl Fn(T, T) -> U,
) -> FerrayResult<Array<U, IxDyn>>
where
T: Element + Copy,
U: Element,
D1: Dimension,
D2: Dimension,
{
let shape = broadcast_shapes(a.shape(), b.shape())?;
let a_view = a.broadcast_to(&shape)?;
let b_view = b.broadcast_to(&shape)?;
let data: Vec<U> = a_view
.iter()
.zip(b_view.iter())
.map(|(&x, &y)| f(x, y))
.collect();
Array::from_vec(IxDyn::from(&shape[..]), data)
}
#[cfg(feature = "f16")]
#[inline]
pub fn unary_f16_op<D>(
input: &Array<half::f16, D>,
f: impl Fn(f32) -> f32,
) -> FerrayResult<Array<half::f16, D>>
where
D: Dimension,
{
let data: Vec<half::f16> = input
.iter()
.map(|&x| half::f16::from_f32(f(x.to_f32())))
.collect();
Array::from_vec(input.dim().clone(), data)
}
#[cfg(feature = "f16")]
#[inline]
pub fn unary_f16_to_bool_op<D>(
input: &Array<half::f16, D>,
f: impl Fn(f32) -> bool,
) -> FerrayResult<Array<bool, D>>
where
D: Dimension,
{
let data: Vec<bool> = input.iter().map(|&x| f(x.to_f32())).collect();
Array::from_vec(input.dim().clone(), data)
}
#[cfg(feature = "f16")]
#[inline]
pub fn binary_f16_op<D>(
a: &Array<half::f16, D>,
b: &Array<half::f16, D>,
f: impl Fn(f32, f32) -> f32,
) -> FerrayResult<Array<half::f16, D>>
where
D: Dimension,
{
binary_elementwise_op(a, b, |x, y| half::f16::from_f32(f(x.to_f32(), y.to_f32())))
}
macro_rules! unary_f16_fn {
(
$(#[$attr:meta])*
$name:ident,
$f32_fn:expr
) => {
$(#[$attr])*
pub fn $name<D>(
input: &::ferray_core::Array<half::f16, D>,
) -> ::ferray_core::error::FerrayResult<
::ferray_core::Array<half::f16, D>,
>
where
D: ::ferray_core::dimension::Dimension,
{
$crate::helpers::unary_f16_op(input, $f32_fn)
}
};
}
macro_rules! binary_f16_fn {
(
$(#[$attr:meta])*
$name:ident,
$f32_fn:expr
) => {
$(#[$attr])*
pub fn $name<D>(
a: &::ferray_core::Array<half::f16, D>,
b: &::ferray_core::Array<half::f16, D>,
) -> ::ferray_core::error::FerrayResult<
::ferray_core::Array<half::f16, D>,
>
where
D: ::ferray_core::dimension::Dimension,
{
$crate::helpers::binary_f16_op(a, b, $f32_fn)
}
};
}
pub(crate) use binary_f16_fn;
pub(crate) use unary_f16_fn;
#[cfg(feature = "f16")]
#[inline]
pub fn binary_f16_to_bool_op<D>(
a: &Array<half::f16, D>,
b: &Array<half::f16, D>,
f: impl Fn(f32, f32) -> bool,
) -> FerrayResult<Array<bool, D>>
where
D: Dimension,
{
binary_map_op(a, b, |x, y| f(x.to_f32(), y.to_f32()))
}
#[inline]
fn check_into_shapes<U: Element, D2: Dimension>(
out: &Array<U, D2>,
input_shape: &[usize],
op_name: &str,
) -> FerrayResult<()> {
if out.shape() != input_shape {
return Err(FerrayError::shape_mismatch(format!(
"{op_name}_into: out shape {:?} does not match input shape {:?}",
out.shape(),
input_shape
)));
}
Ok(())
}
#[inline]
pub fn unary_float_op_into<T, D>(
input: &Array<T, D>,
out: &mut Array<T, D>,
op_name: &str,
f: impl Fn(T) -> T,
) -> FerrayResult<()>
where
T: Element + Copy,
D: Dimension,
{
check_into_shapes::<T, D>(out, input.shape(), op_name)?;
let in_slice = input.as_slice().ok_or_else(|| {
FerrayError::invalid_value(format!(
"{op_name}_into: input must be contiguous (C-order); call {op_name}() for strided arrays"
))
})?;
let out_slice = out.as_slice_mut().ok_or_else(|| {
FerrayError::invalid_value(format!(
"{op_name}_into: out must be contiguous (C-order); call {op_name}() for strided output"
))
})?;
for (o, &x) in out_slice.iter_mut().zip(in_slice.iter()) {
*o = f(x);
}
Ok(())
}
#[inline]
pub fn unary_float_op_into_compute<T, D>(
input: &Array<T, D>,
out: &mut Array<T, D>,
op_name: &str,
f: impl Fn(T) -> T + Sync + Send,
) -> FerrayResult<()>
where
T: Element + Copy,
D: Dimension,
{
check_into_shapes::<T, D>(out, input.shape(), op_name)?;
let in_slice = input.as_slice().ok_or_else(|| {
FerrayError::invalid_value(format!(
"{op_name}_into: input must be contiguous (C-order); call {op_name}() for strided arrays"
))
})?;
let out_slice = out.as_slice_mut().ok_or_else(|| {
FerrayError::invalid_value(format!(
"{op_name}_into: out must be contiguous (C-order); call {op_name}() for strided output"
))
})?;
parallel_unary_fill_threshold(in_slice, out_slice, THRESHOLD_COMPUTE_BOUND, f);
Ok(())
}
#[inline]
pub fn binary_elementwise_op_into<T, D>(
a: &Array<T, D>,
b: &Array<T, D>,
out: &mut Array<T, D>,
op_name: &str,
f: impl Fn(T, T) -> T,
) -> FerrayResult<()>
where
T: Element + Copy,
D: Dimension,
{
if a.shape() != b.shape() {
return Err(FerrayError::shape_mismatch(format!(
"{op_name}_into: input shapes {:?} and {:?} differ (broadcasting not supported; use {op_name}() instead)",
a.shape(),
b.shape()
)));
}
check_into_shapes::<T, D>(out, a.shape(), op_name)?;
let a_slice = a.as_slice().ok_or_else(|| {
FerrayError::invalid_value(format!("{op_name}_into: a must be contiguous (C-order)"))
})?;
let b_slice = b.as_slice().ok_or_else(|| {
FerrayError::invalid_value(format!("{op_name}_into: b must be contiguous (C-order)"))
})?;
let out_slice = out.as_slice_mut().ok_or_else(|| {
FerrayError::invalid_value(format!("{op_name}_into: out must be contiguous (C-order)"))
})?;
for ((o, &x), &y) in out_slice.iter_mut().zip(a_slice.iter()).zip(b_slice.iter()) {
*o = f(x, y);
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use ferray_core::dimension::{Ix1, Ix2};
#[test]
fn unary_op_works() {
let a = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 4.0, 9.0]).unwrap();
let r = unary_float_op(&a, f64::sqrt).unwrap();
assert_eq!(r.as_slice().unwrap(), &[1.0, 2.0, 3.0]);
}
#[test]
fn binary_op_same_shape() {
let a = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 2.0, 3.0]).unwrap();
let b = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![4.0, 5.0, 6.0]).unwrap();
let r = binary_elementwise_op(&a, &b, |x, y| x + y).unwrap();
assert_eq!(r.as_slice().unwrap(), &[5.0, 7.0, 9.0]);
}
#[test]
fn binary_op_shape_mismatch() {
let a = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 2.0, 3.0]).unwrap();
let b = Array::<f64, Ix1>::from_vec(Ix1::new([2]), vec![4.0, 5.0]).unwrap();
assert!(binary_elementwise_op(&a, &b, |x, y| x + y).is_err());
}
#[test]
fn binary_broadcast_works() {
let a = Array::<f64, Ix2>::from_vec(Ix2::new([2, 1]), vec![1.0, 2.0]).unwrap();
let b = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![10.0, 20.0, 30.0]).unwrap();
let r = binary_broadcast_op(&a, &b, |x, y| x + y).unwrap();
assert_eq!(r.shape(), &[2, 3]);
let s: Vec<f64> = r.iter().copied().collect();
assert_eq!(s, vec![11.0, 21.0, 31.0, 12.0, 22.0, 32.0]);
}
#[test]
fn binary_elementwise_op_broadcasts_within_same_rank() {
let a = Array::<f64, Ix2>::from_vec(Ix2::new([3, 1]), vec![1.0, 2.0, 3.0]).unwrap();
let b =
Array::<f64, Ix2>::from_vec(Ix2::new([1, 4]), vec![10.0, 20.0, 30.0, 40.0]).unwrap();
let r = binary_elementwise_op(&a, &b, |x, y| x + y).unwrap();
assert_eq!(r.shape(), &[3, 4]);
assert_eq!(
r.iter().copied().collect::<Vec<_>>(),
vec![
11.0, 21.0, 31.0, 41.0, 12.0, 22.0, 32.0, 42.0, 13.0, 23.0, 33.0, 43.0,
]
);
}
#[test]
fn binary_map_op_broadcasts_within_same_rank() {
let a = Array::<i32, Ix2>::from_vec(Ix2::new([2, 1]), vec![1, 5]).unwrap();
let b = Array::<i32, Ix2>::from_vec(Ix2::new([1, 3]), vec![3, 5, 7]).unwrap();
let r = binary_map_op(&a, &b, |x, y| x < y).unwrap();
assert_eq!(r.shape(), &[2, 3]);
assert_eq!(
r.iter().copied().collect::<Vec<_>>(),
vec![true, true, true, false, false, true]
);
}
#[test]
fn binary_mixed_op_broadcasts() {
let a = Array::<f64, Ix2>::from_vec(Ix2::new([2, 1]), vec![1.0, 4.0]).unwrap();
let b = Array::<i32, Ix2>::from_vec(Ix2::new([1, 3]), vec![1, 2, 3]).unwrap();
let r = binary_mixed_op(&a, &b, |x, n| x * f64::from(1 << n)).unwrap();
assert_eq!(r.shape(), &[2, 3]);
assert_eq!(
r.iter().copied().collect::<Vec<_>>(),
vec![2.0, 4.0, 8.0, 8.0, 16.0, 32.0]
);
}
#[test]
fn binary_op_incompatible_shapes_error() {
let a = Array::<f64, Ix1>::from_vec(Ix1::new([3]), vec![1.0, 2.0, 3.0]).unwrap();
let b = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
assert!(binary_elementwise_op(&a, &b, |x, y| x + y).is_err());
assert!(binary_map_op(&a, &b, |x, y| x == y).is_err());
}
#[test]
fn contig_input_borrows_for_c_order() {
let a = Array::<f64, Ix2>::from_vec(Ix2::new([2, 3]), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
.unwrap();
let cow = contig_input(&a);
match cow {
std::borrow::Cow::Borrowed(_) => {}
std::borrow::Cow::Owned(_) => panic!("expected borrow for C-contig array"),
}
}
#[test]
fn contig_input_materializes_for_fortran_order() {
let f = Array::<f64, Ix2>::from_vec_f(Ix2::new([2, 3]), vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0])
.unwrap();
assert_eq!(
f.iter().copied().collect::<Vec<_>>(),
vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
);
assert!(f.as_slice().is_none());
let cow = contig_input(&f);
match cow {
std::borrow::Cow::Owned(v) => {
assert_eq!(v, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
}
std::borrow::Cow::Borrowed(_) => {
panic!("expected materialized owned Vec for Fortran-order input")
}
}
}
#[test]
fn unary_float_op_works_on_fortran_layout() {
let f = Array::<f64, Ix2>::from_vec_f(Ix2::new([2, 3]), vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0])
.unwrap();
let r = unary_float_op(&f, f64::sqrt).unwrap();
let s = r.as_slice().unwrap();
let expected: Vec<f64> = [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0]
.iter()
.map(|x| x.sqrt())
.collect();
assert_eq!(s, expected.as_slice());
}
#[test]
fn unary_map_op_works_on_fortran_layout() {
let f = Array::<f64, Ix2>::from_vec_f(Ix2::new([2, 3]), vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0])
.unwrap();
let r = unary_map_op(&f, |x| x as i32).unwrap();
let s = r.as_slice().unwrap();
assert_eq!(s, &[1, 2, 3, 4, 5, 6]);
}
#[test]
fn binary_elementwise_op_handles_fortran_lhs() {
let a = Array::<f64, Ix2>::from_vec_f(Ix2::new([2, 3]), vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0])
.unwrap();
let b =
Array::<f64, Ix2>::from_vec(Ix2::new([2, 3]), vec![10.0, 20.0, 30.0, 40.0, 50.0, 60.0])
.unwrap();
let r = binary_elementwise_op(&a, &b, |x, y| x + y).unwrap();
assert_eq!(r.as_slice().unwrap(), &[11.0, 22.0, 33.0, 44.0, 55.0, 66.0]);
}
#[test]
fn binary_elementwise_op_handles_two_fortran_inputs() {
let a = Array::<f64, Ix2>::from_vec_f(Ix2::new([2, 3]), vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0])
.unwrap();
let b = Array::<f64, Ix2>::from_vec_f(
Ix2::new([2, 3]),
vec![10.0, 40.0, 20.0, 50.0, 30.0, 60.0],
)
.unwrap();
let r = binary_elementwise_op(&a, &b, |x, y| x + y).unwrap();
assert_eq!(r.as_slice().unwrap(), &[11.0, 22.0, 33.0, 44.0, 55.0, 66.0]);
}
#[test]
fn binary_map_op_handles_fortran_layout() {
let a = Array::<f64, Ix2>::from_vec_f(Ix2::new([2, 3]), vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0])
.unwrap();
let b = Array::<f64, Ix2>::from_vec(Ix2::new([2, 3]), vec![1.0, 1.0, 4.0, 5.0, 5.0, 5.0])
.unwrap();
let r = binary_map_op(&a, &b, |x, y| x > y).unwrap();
assert_eq!(
r.iter().copied().collect::<Vec<_>>(),
vec![false, true, false, false, false, true]
);
}
#[test]
fn try_simd_f64_unary_runs_on_fortran_layout() {
fn double_kernel(src: &[f64], dst: &mut [f64]) {
for (o, &x) in dst.iter_mut().zip(src.iter()) {
*o = x * 2.0;
}
}
let f = Array::<f64, Ix2>::from_vec_f(Ix2::new([2, 3]), vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0])
.unwrap();
let r = try_simd_f64_unary(&f, double_kernel)
.expect("try_simd_f64_unary should succeed for f64 input")
.unwrap();
assert_eq!(r.as_slice().unwrap(), &[2.0, 4.0, 6.0, 8.0, 10.0, 12.0]);
}
}