use crate::dtype::Float;
use crate::error::{FerrotorchError, FerrotorchResult};
use crate::shape::normalize_axis;
use crate::storage::TensorStorage;
use crate::tensor::Tensor;
fn dim_strides(shape: &[usize], dim: usize) -> (usize, usize, usize) {
let outer: usize = shape[..dim].iter().product();
let dim_size = shape[dim];
let inner: usize = shape[dim + 1..].iter().product();
(outer, dim_size, inner)
}
fn validate_dim(ndim: usize, dim: i64, op_name: &str) -> FerrotorchResult<usize> {
if ndim == 0 {
return Err(FerrotorchError::InvalidArgument {
message: format!("{op_name}: cannot operate on a scalar (0-D) tensor"),
});
}
normalize_axis(dim as isize, ndim)
}
pub fn cumsum_forward<T: Float>(input: &Tensor<T>, dim: i64) -> FerrotorchResult<Tensor<T>> {
let norm_dim = validate_dim(input.ndim(), dim, "cumsum")?;
let input_cpu = if input.is_cuda() { input.cpu()? } else { input.clone() };
let in_data = input_cpu.data()?;
let shape = input_cpu.shape();
let (outer, dim_size, inner) = dim_strides(shape, norm_dim);
let mut out = vec![<T as num_traits::Zero>::zero(); in_data.len()];
for o in 0..outer {
for k in 0..inner {
let base = o * dim_size * inner + k;
let mut acc = <T as num_traits::Zero>::zero();
for i in 0..dim_size {
let idx = base + i * inner;
acc = acc + in_data[idx];
out[idx] = acc;
}
}
}
let result = Tensor::from_storage(TensorStorage::cpu(out), shape.to_vec(), false)?;
result.to(input.device())
}
pub fn reverse_cumsum<T: Float>(data: &[T], shape: &[usize], dim: usize) -> Vec<T> {
let (outer, dim_size, inner) = dim_strides(shape, dim);
let mut out = vec![<T as num_traits::Zero>::zero(); data.len()];
for o in 0..outer {
for k in 0..inner {
let base = o * dim_size * inner + k;
let mut acc = <T as num_traits::Zero>::zero();
for i in (0..dim_size).rev() {
let idx = base + i * inner;
acc = acc + data[idx];
out[idx] = acc;
}
}
}
out
}
pub fn cumprod_forward<T: Float>(input: &Tensor<T>, dim: i64) -> FerrotorchResult<Tensor<T>> {
let norm_dim = validate_dim(input.ndim(), dim, "cumprod")?;
let input_cpu = if input.is_cuda() { input.cpu()? } else { input.clone() };
let in_data = input_cpu.data()?;
let shape = input_cpu.shape();
let (outer, dim_size, inner) = dim_strides(shape, norm_dim);
let mut out = vec![<T as num_traits::Zero>::zero(); in_data.len()];
for o in 0..outer {
for k in 0..inner {
let base = o * dim_size * inner + k;
let mut acc = <T as num_traits::One>::one();
for i in 0..dim_size {
let idx = base + i * inner;
acc = acc * in_data[idx];
out[idx] = acc;
}
}
}
let result = Tensor::from_storage(TensorStorage::cpu(out), shape.to_vec(), false)?;
result.to(input.device())
}
pub struct CumExtremeResult<T: Float> {
pub values: Tensor<T>,
pub indices: Vec<usize>,
}
pub fn cummax_forward<T: Float>(
input: &Tensor<T>,
dim: i64,
) -> FerrotorchResult<CumExtremeResult<T>> {
let norm_dim = validate_dim(input.ndim(), dim, "cummax")?;
let input_cpu = if input.is_cuda() { input.cpu()? } else { input.clone() };
let in_data = input_cpu.data()?;
let shape = input_cpu.shape();
let (outer, dim_size, inner) = dim_strides(shape, norm_dim);
let numel = in_data.len();
let mut out_vals = vec![<T as num_traits::Zero>::zero(); numel];
let mut out_idxs = vec![0usize; numel];
for o in 0..outer {
for k in 0..inner {
let base = o * dim_size * inner + k;
let mut cur_max = <T as num_traits::Float>::neg_infinity();
let mut cur_idx = 0usize;
for i in 0..dim_size {
let idx = base + i * inner;
if in_data[idx] > cur_max {
cur_max = in_data[idx];
cur_idx = i;
}
out_vals[idx] = cur_max;
out_idxs[idx] = cur_idx;
}
}
}
let values = Tensor::from_storage(TensorStorage::cpu(out_vals), shape.to_vec(), false)?;
let values = values.to(input.device())?;
Ok(CumExtremeResult {
values,
indices: out_idxs,
})
}
pub fn cummin_forward<T: Float>(
input: &Tensor<T>,
dim: i64,
) -> FerrotorchResult<CumExtremeResult<T>> {
let norm_dim = validate_dim(input.ndim(), dim, "cummin")?;
let input_cpu = if input.is_cuda() { input.cpu()? } else { input.clone() };
let in_data = input_cpu.data()?;
let shape = input_cpu.shape();
let (outer, dim_size, inner) = dim_strides(shape, norm_dim);
let numel = in_data.len();
let mut out_vals = vec![<T as num_traits::Zero>::zero(); numel];
let mut out_idxs = vec![0usize; numel];
for o in 0..outer {
for k in 0..inner {
let base = o * dim_size * inner + k;
let mut cur_min = <T as num_traits::Float>::infinity();
let mut cur_idx = 0usize;
for i in 0..dim_size {
let idx = base + i * inner;
if in_data[idx] < cur_min {
cur_min = in_data[idx];
cur_idx = i;
}
out_vals[idx] = cur_min;
out_idxs[idx] = cur_idx;
}
}
}
let values = Tensor::from_storage(TensorStorage::cpu(out_vals), shape.to_vec(), false)?;
let values = values.to(input.device())?;
Ok(CumExtremeResult {
values,
indices: out_idxs,
})
}
pub fn logcumsumexp_forward<T: Float>(
input: &Tensor<T>,
dim: i64,
) -> FerrotorchResult<Tensor<T>> {
let norm_dim = validate_dim(input.ndim(), dim, "logcumsumexp")?;
let input_cpu = if input.is_cuda() { input.cpu()? } else { input.clone() };
let in_data = input_cpu.data()?;
let shape = input_cpu.shape();
let (outer, dim_size, inner) = dim_strides(shape, norm_dim);
let mut out = vec![<T as num_traits::Zero>::zero(); in_data.len()];
let neg_inf = <T as num_traits::Float>::neg_infinity();
for o in 0..outer {
for k in 0..inner {
let base = o * dim_size * inner + k;
let mut running_max = neg_inf;
let mut maxes = Vec::with_capacity(dim_size);
for i in 0..dim_size {
let idx = base + i * inner;
if in_data[idx] > running_max {
running_max = in_data[idx];
}
maxes.push(running_max);
}
let mut acc = <T as num_traits::Zero>::zero();
let mut prev_max = neg_inf;
for i in 0..dim_size {
let idx = base + i * inner;
let m = maxes[i];
if m > prev_max && prev_max != neg_inf {
acc = acc * (prev_max - m).exp();
}
acc = acc + (in_data[idx] - m).exp();
out[idx] = m + acc.ln();
prev_max = m;
}
}
}
let result = Tensor::from_storage(TensorStorage::cpu(out), shape.to_vec(), false)?;
result.to(input.device())
}