use std::rc::Rc;
use rand::{distributions::Uniform, Rng, SeedableRng};
use zyx::{DType, Scalar, Tensor, ZyxError};
#[allow(unused)]
fn fuzzy() -> Result<(), ZyxError> {
let rand_seed = 21847091824098071;
let max_tensors = 5;
let max_numel = 256*256;
let max_dims = 3;
let num_nodes = 14;
let mut rng = rand::rngs::SmallRng::seed_from_u64(rand_seed);
let num_t = rng.gen_range(0..max_tensors);
let mut tensors = Vec::new();
let mut cpu_tensors = Vec::new();
for _ in 0..num_t {
let mut shape = Vec::new();
for i in 0..rng.gen_range(1..max_dims) {
let n = if i > 1 {
max_numel / shape.iter().product::<usize>()
} else {
max_numel/10
};
if n > 1 {
shape.insert(0, rng.gen_range(1..n));
} else {
break;
}
}
let numel = shape.iter().product();
let r = Uniform::new(-100., 100.);
let data: Vec<f32> = (0..numel).map(|_| rng.sample(&r)).collect();
tensors.push(Tensor::from(&data).reshape(&shape)?);
cpu_tensors.push(CPUTensor::new(&data).reshape(&shape));
}
for _ in 0..num_nodes {
let x = rng.gen_range(0..num_t);
match rng.gen_range(0..10) {
0 => {
tensors[x] = tensors[x].relu();
cpu_tensors[x] = cpu_tensors[x].relu();
}
1 => {
tensors[x] = -&tensors[x];
cpu_tensors[x] = cpu_tensors[x].neg();
}
2 => {
tensors[x] = tensors[x].exp2();
cpu_tensors[x] = cpu_tensors[x].exp2();
}
3 => {
tensors[x] = tensors[x].log2();
cpu_tensors[x] = cpu_tensors[x].log2();
}
4 => {
tensors[x] = tensors[x].inv();
cpu_tensors[x] = cpu_tensors[x].inv();
}
5 => {
tensors[x] = tensors[x].sqrt();
cpu_tensors[x] = cpu_tensors[x].sqrt();
}
6 => {
tensors[x] = tensors[x].sin();
cpu_tensors[x] = cpu_tensors[x].sin();
}
7 => {
tensors[x] = tensors[x].cos();
cpu_tensors[x] = cpu_tensors[x].cos();
}
8 => {
tensors[x] = !&tensors[x];
cpu_tensors[x] = cpu_tensors[x].not();
}
9 => {
tensors[x] = tensors[x].nonzero();
cpu_tensors[x] = cpu_tensors[x].nonzero();
}
10 => {
}
_ => panic!(),
}
}
Tensor::plot_graph([], "fuzzy_graph");
Tensor::realize(&tensors)?;
for (tensor, cpu_tensor) in tensors.iter().zip(cpu_tensors) {
let data: Vec<f32> = tensor.clone().try_into()?;
let cpu_data: Vec<f32> = cpu_tensor.to_vec();
for (id, (x, y)) in data.iter().zip(cpu_data.iter()).enumerate() {
assert_eq!(x, y, "Comparing tensor id {id}, x != y at {x} != {y}");
}
}
Ok(())
}
#[derive(Clone)]
pub struct CPUTensor {
view: View,
data: Data,
}
macro_rules! unary_op {
($this: expr, $op: expr) => {{
CPUTensor {
view: $this.view.clone(),
data: match &$this.data {
Data::F32(data) => Data::F32(unary(data, $op)),
Data::F64(data) => Data::F64(unary(data, $op)),
Data::I32(data) => Data::I32(unary(data, $op)),
}
}
}};
}
macro_rules! binary_op {
($x: expr, $y: expr, $op: expr) => {{
CPUTensor {
view: $x.view.clone(),
data: match &$x.data {
Data::F32(xdata) => {
let Data::F32(ydata) = &$y.data else { panic!() };
Data::F32(binary(&$x.view, xdata, &$y.view, ydata, $op))
}
Data::F64(xdata) => {
let Data::F64(ydata) = &$y.data else { panic!() };
Data::F64(binary(&$x.view, xdata, &$y.view, ydata, $op))
}
Data::I32(xdata) => {
let Data::I32(ydata) = &$y.data else { panic!() };
Data::I32(binary(&$x.view, xdata, &$y.view, ydata, $op))
}
},
}
}};
}
impl CPUTensor {
pub fn new<T: Scalar>(data: &[T]) -> CPUTensor {
use std::mem::transmute as t;
CPUTensor { view: View::new(&[data.len()]), data: match T::dtype() {
#[cfg(feature = "half")]
DType::BF16 => todo!(),
#[cfg(feature = "half")]
DType::F16 => todo!(),
DType::F32 => Data::F32(unsafe { t::<_, &[f32]>(data) }.into()),
DType::F64 => todo!(),
#[cfg(feature = "complex")]
DType::CF32 => todo!(),
#[cfg(feature = "complex")]
DType::CF64 => todo!(),
DType::U8 => todo!(),
DType::I8 => todo!(),
DType::I16 => todo!(),
DType::I32 => todo!(),
DType::I64 => todo!(),
DType::Bool => todo!(),
} }
}
pub fn to_vec<T: Scalar>(&self) -> Vec<T> {
use std::mem::transmute as t;
let numel = self.view.numel();
match &self.data {
Data::F32(data) => unsafe {
t(self
.view
.iterate_padded(data)
.take(numel)
.collect::<Vec<f32>>())
},
Data::F64(data) => unsafe {
t(self
.view
.iterate_padded(data)
.take(numel)
.collect::<Vec<f64>>())
},
Data::I32(data) => unsafe {
t(self
.view
.iterate_padded(data)
.take(numel)
.collect::<Vec<i32>>())
},
}
}
pub fn shape(&self) -> Vec<usize> {
self.view.shape()
}
pub fn cast(&self, dtype: DType) -> CPUTensor {
let data = match &self.data {
Data::F32(data) => match dtype {
DType::F32 => Data::F32(unary(data, Scalar::cast)),
DType::F64 => Data::F64(unary(data, Scalar::cast)),
DType::I32 => Data::I32(unary(data, Scalar::cast)),
_ => todo!(),
},
Data::F64(data) => match dtype {
DType::F32 => Data::F32(unary(data, Scalar::cast)),
DType::F64 => Data::F64(unary(data, Scalar::cast)),
DType::I32 => Data::I32(unary(data, Scalar::cast)),
_ => todo!(),
},
Data::I32(data) => match dtype {
DType::F32 => Data::F32(unary(data, Scalar::cast)),
DType::F64 => Data::F64(unary(data, Scalar::cast)),
DType::I32 => Data::I32(unary(data, Scalar::cast)),
_ => todo!(),
},
};
CPUTensor { view: self.view.clone(), data }
}
pub fn relu(&self) -> CPUTensor {
unary_op!(self, Scalar::relu)
}
pub fn neg(&self) -> CPUTensor {
unary_op!(self, Scalar::neg)
}
pub fn exp2(&self) -> CPUTensor {
unary_op!(self, Scalar::exp2)
}
pub fn log2(&self) -> CPUTensor {
unary_op!(self, Scalar::log2)
}
pub fn inv(&self) -> CPUTensor {
unary_op!(self, Scalar::inv)
}
pub fn sqrt(&self) -> CPUTensor {
unary_op!(self, Scalar::sqrt)
}
pub fn sin(&self) -> CPUTensor {
unary_op!(self, Scalar::sin)
}
pub fn cos(&self) -> CPUTensor {
unary_op!(self, Scalar::cos)
}
pub fn not(&self) -> CPUTensor {
unary_op!(self, Scalar::not)
}
pub fn nonzero(&self) -> CPUTensor {
unary_op!(self, Scalar::nonzero)
}
pub fn add(&self, other: &CPUTensor) -> CPUTensor {
binary_op!(self, other, Scalar::add)
}
pub fn sub(&self, other: &CPUTensor) -> CPUTensor {
binary_op!(self, other, Scalar::sub)
}
pub fn mul(&self, other: &CPUTensor) -> CPUTensor {
binary_op!(self, other, Scalar::mul)
}
pub fn div(&self, other: &CPUTensor) -> CPUTensor {
binary_op!(self, other, Scalar::div)
}
pub fn pow(&self, other: &CPUTensor) -> CPUTensor {
binary_op!(self, other, Scalar::pow)
}
pub fn cmplt(&self, other: &CPUTensor) -> CPUTensor {
binary_op!(self, other, Scalar::cmplt)
}
pub fn cmpgt(&self, other: &CPUTensor) -> CPUTensor {
binary_op!(self, other, Scalar::cmpgt)
}
pub fn max(&self, other: &CPUTensor) -> CPUTensor {
binary_op!(self, other, Scalar::max)
}
pub fn or(&self, other: &CPUTensor) -> CPUTensor {
binary_op!(self, other, Scalar::or)
}
pub fn reduce_sum_kd(&self, axes: &[usize]) -> CPUTensor {
CPUTensor {
view: View::new(&self.shape()),
data: match &self.data {
Data::F32(data) => Data::F32(reduce_op(&self.view, data, axes, &self.shape().reduce(axes), true)),
_ => todo!(),
}
}
}
pub fn reduce_max_kd(&self, axes: &[usize]) -> CPUTensor {
CPUTensor {
view: View::new(&self.shape()),
data: match &self.data {
Data::F32(data) => Data::F32(reduce_op(&self.view, data, axes, &self.shape().reduce(axes), false)),
_ => todo!(),
}
}
}
pub fn pad(&self, padding: &[(isize, isize)]) -> CPUTensor {
CPUTensor {
view: self.view.pad(padding),
data: self.data.clone(), }
}
pub fn permute(&self, axes: &[usize]) -> CPUTensor {
CPUTensor {
view: self.view.permute(axes),
data: self.data.clone(), }
}
pub fn expand(&self, shape: &[usize]) -> CPUTensor {
CPUTensor {
view: self.view.expand(shape),
data: self.data.clone(), }
}
pub fn reshape(&self, shape: &[usize]) -> CPUTensor {
CPUTensor {
view: self.view.reshape(shape),
data: self.data.clone(), }
}
}
#[derive(Debug, Clone)]
enum Data {
F32(Rc<[f32]>),
F64(Rc<[f64]>),
I32(Rc<[i32]>),
}
fn unary<T: Scalar + Sync + Send, T2: Scalar + Send>(
data: &[T],
op: impl Fn(T) -> T2 + Sync + Send,
) -> Rc<[T2]> {
data.iter().cloned().map(op).collect()
}
fn binary<XT: Scalar + Sync + Send, YT: Scalar + Sync + Send, T2: Scalar + Send>(
xview: &View,
xdata: &[XT],
yview: &View,
ydata: &[YT],
op: impl Fn(XT, YT) -> T2 + Sync + Send,
) -> Rc<[T2]> {
xview
.iterate_padded(xdata)
.zip(yview.iterate_padded(ydata))
.map(|(x, y)| op(x, y))
.collect()
}
fn reduce_op<T: Scalar>(
view: &View,
data: &[T],
axes: &[usize],
res_shape: &[usize],
sum_reduce: bool,
) -> Rc<[T]> {
use std::boxed::Box;
let shape = view.shape();
let strides = shape.strides();
let included_dims: Box<[usize]> = (0..shape.len()).filter(|x| !axes.contains(x)).collect();
let res_strides = res_shape.strides();
let mut res: Vec<T> = if sum_reduce {
core::iter::repeat(T::zero())
} else {
core::iter::repeat(T::min_value())
}
.take(res_shape.iter().product())
.collect();
for (i, x) in view.iterate_padded(data).enumerate() {
let mut j = 0;
for dim in &*included_dims {
j += ((i / strides[*dim]) % shape[*dim]) * res_strides[*dim];
}
if sum_reduce {
res[j] = Scalar::add(res[j].clone(), x);
} else {
res[j] = Scalar::max(res[j].clone(), x);
}
}
res.into()
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct View {
views: Vec<InnerView>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
struct InnerView {
shape: Vec<usize>,
strides: Vec<usize>,
padding: Vec<(isize, isize)>,
}
impl InnerView {
#[must_use]
fn is_contiguous(&self) -> bool {
self.shape.strides() == self.strides && !self.is_padded()
}
#[must_use]
fn is_padded(&self) -> bool {
self.padding.iter().any(|(lp, rp)| *lp != 0 || *rp != 0)
}
}
pub struct CPUPaddedIter<'a, T> {
data: &'a [T],
view: &'a View,
idx: usize,
num_iters: usize,
}
impl<'a, T: Scalar> Iterator for CPUPaddedIter<'a, T> {
type Item = T;
fn next(&mut self) -> Option<Self::Item> {
if self.idx > self.num_iters {
return None;
}
let mut idx = self.idx;
self.idx += 1;
for InnerView {
shape,
strides,
padding,
} in &self.view.views
{
let mut res = 0;
for ((d, st), (lp, rp)) in shape.into_iter().zip(strides).zip(padding.iter()).rev() {
let mut dim_idx = idx % d;
if *lp > 0 {
let lpu = *lp as usize;
if dim_idx < lpu {
return Some(T::zero());
}
dim_idx -= lpu;
} else if *lp < 0 {
dim_idx += (-*lp) as usize;
}
if *rp > 0 {
if dim_idx > *rp as usize {
return Some(T::zero());
}
}
res += dim_idx * st;
idx /= d;
}
idx = res;
}
Some(self.data[idx].clone())
}
}
impl View {
pub fn new(shape: &[usize]) -> View {
let shape: Vec<usize> = shape.into();
let strides = shape.strides();
View { views: vec![InnerView { shape, strides, padding: Vec::new() }] }
}
#[must_use]
pub fn is_contiguous(&self) -> bool {
self.views.iter().all(InnerView::is_contiguous)
}
#[must_use]
pub fn shape(&self) -> Vec<usize> {
self.views.first().unwrap().shape.clone()
}
#[must_use]
pub fn numel(&self) -> usize {
self.shape().iter().product()
}
#[must_use]
pub fn iterate_padded<'a, T: Scalar>(&'a self, data: &'a [T]) -> impl Iterator<Item = T> + 'a {
CPUPaddedIter {
data,
view: self,
idx: 0,
num_iters: self.numel() - 1,
}
}
#[must_use]
pub fn expand(&self, shape: &[usize]) -> Self {
let mut views = self.views.clone();
views[0].strides = views[0]
.shape
.expand_strides(shape, views[0].strides.clone());
views[0].shape = shape.into();
let n = shape.len() - views[0].padding.len();
views[0].padding = core::iter::repeat((0, 0))
.take(n)
.chain(views[0].padding.iter().copied())
.collect();
Self { views }
}
#[must_use]
pub fn pad(&self, new_padding: &[(isize, isize)]) -> Self {
let mut views = self.views.clone();
if let Some(InnerView {
shape,
strides: _,
padding,
}) = views.first_mut()
{
for (i, d) in shape.iter_mut().rev().enumerate() {
if let Some((left, right)) = new_padding.get(i) {
*d = (*d as isize + left + right) as usize;
} else {
break;
}
}
let n = padding.len() - new_padding.len();
*padding = core::iter::repeat(&(0, 0))
.take(n)
.chain(new_padding.iter().rev())
.zip(padding.iter())
.map(|(x, y)| (x.0 + y.0, x.1 + y.1))
.collect();
}
Self { views }
}
#[must_use]
pub fn permute(&self, axes: &[usize]) -> Self {
let mut views = self.views.clone();
views[0].shape = views[0].shape.permute(axes);
views[0].strides = views[0].strides.permute(axes);
let padding = &views[0].padding;
let padding = axes.iter().map(|axis| padding[*axis]).collect();
views[0].padding = padding;
Self { views }
}
#[must_use]
pub fn reshape(&self, n_shape: &[usize]) -> Self {
if n_shape == self.shape() {
return self.clone();
}
debug_assert_eq!(
n_shape.numel(),
self.numel(),
"Can't reshape {:?} to {:?}",
self.shape(),
n_shape
);
let mut views = self.views.clone();
if views.first().unwrap().is_contiguous() {
views[0] = InnerView {
shape: n_shape.into(),
strides: n_shape.strides(),
padding: core::iter::repeat((0, 0)).take(n_shape.len()).collect(),
};
} else {
let shape = self.shape();
if n_shape.len() > shape.len()
&& n_shape
.iter()
.filter(|d| **d != 1)
.zip(shape.iter())
.all(|(nd, d)| nd == d)
{
if let Some(InnerView {
shape,
strides,
padding,
}) = views.first_mut()
{
*shape = n_shape.into();
let mut n_strides: Vec<usize> = strides.clone().into();
let mut n_padding = padding.to_vec();
for (i, d) in n_shape.iter().rev().enumerate() {
if *d == 1 {
n_strides.insert(
n_strides.len() - i,
if i == 0 {
1
} else {
n_strides[n_strides.len() - i]
},
);
n_padding.insert(n_padding.len() - i, (0, 0));
}
}
*strides = n_strides.into();
*padding = n_padding;
}
} else {
views.insert(
0,
InnerView {
shape: n_shape.into(),
strides: n_shape.strides(),
padding: core::iter::repeat((0, 0)).take(n_shape.len()).collect(),
},
);
}
}
Self { views }
}
}
trait Shape {
fn as_slice(&self) -> &[usize];
fn strides(&self) -> Vec<usize> {
let mut stride = 1;
let shape = self.as_slice();
let mut res = Vec::with_capacity(shape.len());
for d in shape.iter().rev() {
res.push(stride);
stride *= d;
}
res.reverse();
res
}
fn expand_strides(&self, shape: &[usize], mut old_strides: Vec<usize>) -> Vec<usize> {
let mut vec = self.as_slice().to_vec();
while vec.len() < shape.len() {
vec.insert(0, 1);
old_strides = [0]
.into_iter()
.chain(old_strides.iter().copied())
.collect();
}
vec
.into_iter()
.zip(shape)
.zip(&old_strides)
.map(|((od, nd), st)| if od == *nd { *st } else { 0 })
.collect()
}
fn permute(&self, axes: &[usize]) -> Vec<usize> {
let shape = self.as_slice();
axes.iter().map(|axis| shape[*axis]).collect()
}
fn numel(&self) -> usize {
self.as_slice().iter().product()
}
fn reduce(&self, axes: &[usize]) -> Vec<usize> {
let mut shape: Vec<usize> = self.as_slice().into();
for a in axes.iter() {
shape[*a] = 1;
}
shape
}
}
impl Shape for Vec<usize> {
fn as_slice(&self) -> &[usize] {
self
}
}
impl Shape for &[usize] {
fn as_slice(&self) -> &[usize] {
self
}
}