use crate::{FloatElement, Tensor};
use torsh_core::error::{Result, TorshError};
use torsh_core::TensorElement;
impl<T: FloatElement> Tensor<T> {
pub fn conv1d(
&self,
weight: &Self,
bias: Option<&Self>,
stride: usize,
padding: usize,
dilation: usize,
groups: usize,
) -> Result<Self> {
let input_shape_obj = self.shape();
let input_shape = input_shape_obj.dims();
let weight_shape_obj = weight.shape();
let weight_shape = weight_shape_obj.dims();
if input_shape.len() != 3 {
return Err(TorshError::InvalidArgument(format!(
"Expected 3D input tensor for conv1d, got {}D",
input_shape.len()
)));
}
if weight_shape.len() != 3 {
return Err(TorshError::InvalidArgument(format!(
"Expected 3D weight tensor for conv1d, got {}D",
weight_shape.len()
)));
}
let batch_size = input_shape[0];
let in_channels = input_shape[1];
let input_length = input_shape[2];
let out_channels = weight_shape[0];
let kernel_size = weight_shape[2];
if in_channels % groups != 0 || out_channels % groups != 0 {
return Err(TorshError::InvalidArgument(
"in_channels and out_channels must be divisible by groups".to_string(),
));
}
if weight_shape[1] != in_channels / groups {
return Err(TorshError::InvalidArgument(format!(
"Weight tensor has wrong number of input channels: expected {}, got {}",
in_channels / groups,
weight_shape[1]
)));
}
let effective_kernel = (kernel_size - 1) * dilation + 1;
let padded_length = input_length + 2 * padding;
let output_length = (padded_length - effective_kernel) / stride + 1;
let mut output_data =
vec![<T as TensorElement>::zero(); batch_size * out_channels * output_length];
for n in 0..batch_size {
for g in 0..groups {
let out_ch_start = g * (out_channels / groups);
let out_ch_end = (g + 1) * (out_channels / groups);
let in_ch_start = g * (in_channels / groups);
let in_ch_end = (g + 1) * (in_channels / groups);
for oc in out_ch_start..out_ch_end {
for ol in 0..output_length {
let mut sum = <T as TensorElement>::zero();
for ic in in_ch_start..in_ch_end {
let ic_rel = ic - in_ch_start;
for k in 0..kernel_size {
let il = (ol * stride + k * dilation) as i32 - padding as i32;
if il >= 0 && (il as usize) < input_length {
let input_idx = n * in_channels * input_length
+ ic * input_length
+ il as usize;
let weight_idx = oc * (in_channels / groups) * kernel_size
+ ic_rel * kernel_size
+ k;
let input_val = self.storage.get(input_idx)?;
let weight_val = weight.storage.get(weight_idx)?;
sum = sum + input_val * weight_val;
}
}
}
let output_idx = n * out_channels * output_length + oc * output_length + ol;
output_data[output_idx] = sum;
}
}
}
}
let mut output = Tensor::from_data(
output_data,
vec![batch_size, out_channels, output_length],
self.device(),
)?;
if let Some(b) = bias {
if b.shape().dims() != [out_channels] {
return Err(TorshError::InvalidArgument(format!(
"Bias must have shape [{}], got {:?}",
out_channels,
b.shape().dims()
)));
}
let bias_data = b.to_vec()?;
let mut output_data = output.to_vec()?;
for n in 0..batch_size {
#[allow(clippy::needless_range_loop)]
for oc in 0..out_channels {
for ol in 0..output_length {
let idx = n * out_channels * output_length + oc * output_length + ol;
output_data[idx] = output_data[idx] + bias_data[oc];
}
}
}
output = Tensor::from_data(
output_data,
vec![batch_size, out_channels, output_length],
self.device(),
)?;
}
if self.requires_grad
|| weight.requires_grad
|| (bias.is_some() && bias.expect("bias checked with is_some").requires_grad)
{
use std::sync::Arc;
output.requires_grad = true;
output.operation = crate::Operation::Custom(
"conv1d".to_string(),
vec![
Arc::downgrade(&Arc::new(self.clone())),
Arc::downgrade(&Arc::new(weight.clone())),
],
);
}
Ok(output)
}
pub fn conv2d(
&self,
weight: &Self,
bias: Option<&Self>,
stride: (usize, usize),
padding: (usize, usize),
dilation: (usize, usize),
groups: usize,
) -> Result<Self> {
let input_shape_obj = self.shape();
let input_shape = input_shape_obj.dims();
let weight_shape_obj = weight.shape();
let weight_shape = weight_shape_obj.dims();
if input_shape.len() != 4 {
return Err(TorshError::InvalidArgument(format!(
"Expected 4D input tensor for conv2d, got {}D",
input_shape.len()
)));
}
if weight_shape.len() != 4 {
return Err(TorshError::InvalidArgument(format!(
"Expected 4D weight tensor for conv2d, got {}D",
weight_shape.len()
)));
}
let batch_size = input_shape[0];
let in_channels = input_shape[1];
let input_height = input_shape[2];
let input_width = input_shape[3];
let out_channels = weight_shape[0];
let kernel_height = weight_shape[2];
let kernel_width = weight_shape[3];
if in_channels % groups != 0 || out_channels % groups != 0 {
return Err(TorshError::InvalidArgument(
"in_channels and out_channels must be divisible by groups".to_string(),
));
}
if weight_shape[1] != in_channels / groups {
return Err(TorshError::InvalidArgument(format!(
"Weight tensor has wrong number of input channels: expected {}, got {}",
in_channels / groups,
weight_shape[1]
)));
}
let effective_kernel_h = (kernel_height - 1) * dilation.0 + 1;
let effective_kernel_w = (kernel_width - 1) * dilation.1 + 1;
let padded_height = input_height + 2 * padding.0;
let padded_width = input_width + 2 * padding.1;
let output_height = (padded_height - effective_kernel_h) / stride.0 + 1;
let output_width = (padded_width - effective_kernel_w) / stride.1 + 1;
let mut output_data = vec![
<T as TensorElement>::zero();
batch_size * out_channels * output_height * output_width
];
let self_data = self.to_vec()?;
let weight_data = weight.to_vec()?;
for n in 0..batch_size {
for g in 0..groups {
let out_ch_start = g * (out_channels / groups);
let out_ch_end = (g + 1) * (out_channels / groups);
let in_ch_start = g * (in_channels / groups);
let in_ch_end = (g + 1) * (in_channels / groups);
for oc in out_ch_start..out_ch_end {
for oh in 0..output_height {
for ow in 0..output_width {
let mut sum = <T as TensorElement>::zero();
for ic in in_ch_start..in_ch_end {
let ic_rel = ic - in_ch_start;
for kh in 0..kernel_height {
for kw in 0..kernel_width {
let ih = (oh * stride.0 + kh * dilation.0) as i32
- padding.0 as i32;
let iw = (ow * stride.1 + kw * dilation.1) as i32
- padding.1 as i32;
if ih >= 0
&& (ih as usize) < input_height
&& iw >= 0
&& (iw as usize) < input_width
{
let input_idx =
n * in_channels * input_height * input_width
+ ic * input_height * input_width
+ ih as usize * input_width
+ iw as usize;
let weight_idx = oc
* (in_channels / groups)
* kernel_height
* kernel_width
+ ic_rel * kernel_height * kernel_width
+ kh * kernel_width
+ kw;
sum = sum
+ self_data[input_idx] * weight_data[weight_idx];
}
}
}
}
let output_idx = n * out_channels * output_height * output_width
+ oc * output_height * output_width
+ oh * output_width
+ ow;
output_data[output_idx] = sum;
}
}
}
}
}
let mut output = Tensor::from_data(
output_data,
vec![batch_size, out_channels, output_height, output_width],
self.device(),
)?;
if let Some(b) = bias {
if b.shape().dims() != [out_channels] {
return Err(TorshError::InvalidArgument(format!(
"Bias must have shape [{}], got {:?}",
out_channels,
b.shape().dims()
)));
}
let bias_data = b.to_vec()?;
let mut output_data = output.to_vec()?;
for n in 0..batch_size {
#[allow(clippy::needless_range_loop)]
for oc in 0..out_channels {
for oh in 0..output_height {
for ow in 0..output_width {
let idx = n * out_channels * output_height * output_width
+ oc * output_height * output_width
+ oh * output_width
+ ow;
output_data[idx] = output_data[idx] + bias_data[oc];
}
}
}
}
output = Tensor::from_data(
output_data,
vec![batch_size, out_channels, output_height, output_width],
self.device(),
)?;
}
if self.requires_grad
|| weight.requires_grad
|| (bias.is_some() && bias.expect("bias checked with is_some").requires_grad)
{
use std::sync::Arc;
output.requires_grad = true;
output.operation = crate::Operation::Custom(
"conv2d".to_string(),
vec![
Arc::downgrade(&Arc::new(self.clone())),
Arc::downgrade(&Arc::new(weight.clone())),
],
);
}
Ok(output)
}
pub fn conv3d(
&self,
weight: &Self,
bias: Option<&Self>,
stride: (usize, usize, usize),
padding: (usize, usize, usize),
dilation: (usize, usize, usize),
groups: usize,
) -> Result<Self> {
let input_shape_obj = self.shape();
let input_shape = input_shape_obj.dims();
let weight_shape_obj = weight.shape();
let weight_shape = weight_shape_obj.dims();
if input_shape.len() != 5 {
return Err(TorshError::InvalidArgument(format!(
"Expected 5D input tensor for conv3d, got {}D",
input_shape.len()
)));
}
if weight_shape.len() != 5 {
return Err(TorshError::InvalidArgument(format!(
"Expected 5D weight tensor for conv3d, got {}D",
weight_shape.len()
)));
}
let batch_size = input_shape[0];
let in_channels = input_shape[1];
let input_depth = input_shape[2];
let input_height = input_shape[3];
let input_width = input_shape[4];
let out_channels = weight_shape[0];
let kernel_depth = weight_shape[2];
let kernel_height = weight_shape[3];
let kernel_width = weight_shape[4];
if in_channels % groups != 0 || out_channels % groups != 0 {
return Err(TorshError::InvalidArgument(
"in_channels and out_channels must be divisible by groups".to_string(),
));
}
if weight_shape[1] != in_channels / groups {
return Err(TorshError::InvalidArgument(format!(
"Weight tensor has wrong number of input channels: expected {}, got {}",
in_channels / groups,
weight_shape[1]
)));
}
let effective_kernel_d = (kernel_depth - 1) * dilation.0 + 1;
let effective_kernel_h = (kernel_height - 1) * dilation.1 + 1;
let effective_kernel_w = (kernel_width - 1) * dilation.2 + 1;
let padded_depth = input_depth + 2 * padding.0;
let padded_height = input_height + 2 * padding.1;
let padded_width = input_width + 2 * padding.2;
let output_depth = (padded_depth - effective_kernel_d) / stride.0 + 1;
let output_height = (padded_height - effective_kernel_h) / stride.1 + 1;
let output_width = (padded_width - effective_kernel_w) / stride.2 + 1;
let output_size = batch_size * out_channels * output_depth * output_height * output_width;
let mut output_data = vec![<T as TensorElement>::zero(); output_size];
let self_data = self.to_vec()?;
let weight_data = weight.to_vec()?;
for n in 0..batch_size {
for g in 0..groups {
let out_ch_start = g * (out_channels / groups);
let out_ch_end = (g + 1) * (out_channels / groups);
let in_ch_start = g * (in_channels / groups);
let in_ch_end = (g + 1) * (in_channels / groups);
for oc in out_ch_start..out_ch_end {
for od in 0..output_depth {
for oh in 0..output_height {
for ow in 0..output_width {
let mut sum = <T as TensorElement>::zero();
for ic in in_ch_start..in_ch_end {
let ic_rel = ic - in_ch_start;
for kd in 0..kernel_depth {
for kh in 0..kernel_height {
for kw in 0..kernel_width {
let id = (od * stride.0 + kd * dilation.0) as i32
- padding.0 as i32;
let ih = (oh * stride.1 + kh * dilation.1) as i32
- padding.1 as i32;
let iw = (ow * stride.2 + kw * dilation.2) as i32
- padding.2 as i32;
if id >= 0
&& (id as usize) < input_depth
&& ih >= 0
&& (ih as usize) < input_height
&& iw >= 0
&& (iw as usize) < input_width
{
let input_idx = n
* in_channels
* input_depth
* input_height
* input_width
+ ic * input_depth
* input_height
* input_width
+ id as usize * input_height * input_width
+ ih as usize * input_width
+ iw as usize;
let weight_idx = oc
* (in_channels / groups)
* kernel_depth
* kernel_height
* kernel_width
+ ic_rel
* kernel_depth
* kernel_height
* kernel_width
+ kd * kernel_height * kernel_width
+ kh * kernel_width
+ kw;
sum = sum
+ self_data[input_idx]
* weight_data[weight_idx];
}
}
}
}
}
let output_idx =
n * out_channels * output_depth * output_height * output_width
+ oc * output_depth * output_height * output_width
+ od * output_height * output_width
+ oh * output_width
+ ow;
output_data[output_idx] = sum;
}
}
}
}
}
}
let mut output = Tensor::from_data(
output_data,
vec![
batch_size,
out_channels,
output_depth,
output_height,
output_width,
],
self.device(),
)?;
if let Some(b) = bias {
if b.shape().dims() != [out_channels] {
return Err(TorshError::InvalidArgument(format!(
"Bias must have shape [{}], got {:?}",
out_channels,
b.shape().dims()
)));
}
let bias_data = b.to_vec()?;
let mut output_data = output.to_vec()?;
for n in 0..batch_size {
#[allow(clippy::needless_range_loop)]
for oc in 0..out_channels {
for od in 0..output_depth {
for oh in 0..output_height {
for ow in 0..output_width {
let idx =
n * out_channels * output_depth * output_height * output_width
+ oc * output_depth * output_height * output_width
+ od * output_height * output_width
+ oh * output_width
+ ow;
output_data[idx] = output_data[idx] + bias_data[oc];
}
}
}
}
}
output = Tensor::from_data(
output_data,
vec![
batch_size,
out_channels,
output_depth,
output_height,
output_width,
],
self.device(),
)?;
}
if self.requires_grad
|| weight.requires_grad
|| (bias.is_some() && bias.expect("bias checked with is_some").requires_grad)
{
use std::sync::Arc;
output.requires_grad = true;
output.operation = crate::Operation::Custom(
"conv3d".to_string(),
vec![
Arc::downgrade(&Arc::new(self.clone())),
Arc::downgrade(&Arc::new(weight.clone())),
],
);
}
Ok(output)
}
pub fn depthwise_conv2d(
&self,
weight: &Self,
bias: Option<&Self>,
stride: (usize, usize),
padding: (usize, usize),
dilation: (usize, usize),
) -> Result<Self> {
let input_shape_obj = self.shape();
let input_shape = input_shape_obj.dims();
let weight_shape_obj = weight.shape();
let weight_shape = weight_shape_obj.dims();
if input_shape.len() != 4 {
return Err(TorshError::InvalidArgument(format!(
"Expected 4D input tensor for depthwise_conv2d, got {}D",
input_shape.len()
)));
}
if weight_shape.len() != 4 {
return Err(TorshError::InvalidArgument(format!(
"Expected 4D weight tensor for depthwise_conv2d, got {}D",
weight_shape.len()
)));
}
let batch_size = input_shape[0];
let in_channels = input_shape[1];
let input_height = input_shape[2];
let input_width = input_shape[3];
let kernel_height = weight_shape[2];
let kernel_width = weight_shape[3];
if weight_shape[0] != in_channels || weight_shape[1] != 1 {
return Err(TorshError::InvalidArgument(format!(
"Weight tensor must have shape ({}, 1, kernel_h, kernel_w), got ({}, {}, {}, {})",
in_channels, weight_shape[0], weight_shape[1], weight_shape[2], weight_shape[3]
)));
}
let effective_kernel_h = (kernel_height - 1) * dilation.0 + 1;
let effective_kernel_w = (kernel_width - 1) * dilation.1 + 1;
let padded_height = input_height + 2 * padding.0;
let padded_width = input_width + 2 * padding.1;
let output_height = (padded_height - effective_kernel_h) / stride.0 + 1;
let output_width = (padded_width - effective_kernel_w) / stride.1 + 1;
let mut output_data = vec![
<T as TensorElement>::zero();
batch_size * in_channels * output_height * output_width
];
let _self_data = self.to_vec()?;
let _weight_data = weight.to_vec()?;
for n in 0..batch_size {
for c in 0..in_channels {
for oh in 0..output_height {
for ow in 0..output_width {
let mut sum = <T as TensorElement>::zero();
for kh in 0..kernel_height {
for kw in 0..kernel_width {
let ih =
(oh * stride.0 + kh * dilation.0) as i32 - padding.0 as i32;
let iw =
(ow * stride.1 + kw * dilation.1) as i32 - padding.1 as i32;
if ih >= 0
&& (ih as usize) < input_height
&& iw >= 0
&& (iw as usize) < input_width
{
let input_idx = n * in_channels * input_height * input_width
+ c * input_height * input_width
+ ih as usize * input_width
+ iw as usize;
let weight_idx =
c * kernel_height * kernel_width + kh * kernel_width + kw;
let input_val = self.storage.get(input_idx)?;
let weight_val = weight.storage.get(weight_idx)?;
sum = sum + input_val * weight_val;
}
}
}
let output_idx = n * in_channels * output_height * output_width
+ c * output_height * output_width
+ oh * output_width
+ ow;
output_data[output_idx] = sum;
}
}
}
}
let mut output = Tensor::from_data(
output_data,
vec![batch_size, in_channels, output_height, output_width],
self.device(),
)?;
if let Some(b) = bias {
if b.shape().dims() != [in_channels] {
return Err(TorshError::InvalidArgument(format!(
"Bias must have shape [{}], got {:?}",
in_channels,
b.shape().dims()
)));
}
let bias_data = b.to_vec()?;
let mut output_data = output.to_vec()?;
for n in 0..batch_size {
#[allow(clippy::needless_range_loop)]
for c in 0..in_channels {
for oh in 0..output_height {
for ow in 0..output_width {
let idx = n * in_channels * output_height * output_width
+ c * output_height * output_width
+ oh * output_width
+ ow;
output_data[idx] = output_data[idx] + bias_data[c];
}
}
}
}
output = Tensor::from_data(
output_data,
vec![batch_size, in_channels, output_height, output_width],
self.device(),
)?;
}
if self.requires_grad
|| weight.requires_grad
|| (bias.is_some() && bias.expect("bias checked with is_some").requires_grad)
{
use std::sync::Arc;
output.requires_grad = true;
output.operation = crate::Operation::Custom(
"depthwise_conv2d".to_string(),
vec![
Arc::downgrade(&Arc::new(self.clone())),
Arc::downgrade(&Arc::new(weight.clone())),
],
);
}
Ok(output)
}
pub fn separable_conv2d(
&self,
depthwise_weight: &Self,
pointwise_weight: &Self,
bias: Option<&Self>,
stride: (usize, usize),
padding: (usize, usize),
dilation: (usize, usize),
) -> Result<Self> {
let depthwise_output = self.depthwise_conv2d(
depthwise_weight,
None, stride,
padding,
dilation,
)?;
let output = depthwise_output.conv2d(
pointwise_weight,
bias,
(1, 1), (0, 0), (1, 1), 1, )?;
if self.requires_grad
|| depthwise_weight.requires_grad
|| pointwise_weight.requires_grad
|| (bias.is_some() && bias.expect("bias checked with is_some").requires_grad)
{
use std::sync::Arc;
let mut tracked_output = output;
tracked_output.requires_grad = true;
tracked_output.operation = crate::Operation::Custom(
"separable_conv2d".to_string(),
vec![
Arc::downgrade(&Arc::new(self.clone())),
Arc::downgrade(&Arc::new(depthwise_weight.clone())),
Arc::downgrade(&Arc::new(pointwise_weight.clone())),
],
);
Ok(tracked_output)
} else {
Ok(output)
}
}
#[allow(clippy::too_many_arguments)]
pub fn conv_transpose2d(
&self,
weight: &Self,
bias: Option<&Self>,
stride: (usize, usize),
padding: (usize, usize),
output_padding: (usize, usize),
dilation: (usize, usize),
groups: usize,
) -> Result<Self> {
let input_shape_obj = self.shape();
let input_shape = input_shape_obj.dims();
let weight_shape_obj = weight.shape();
let weight_shape = weight_shape_obj.dims();
if input_shape.len() != 4 {
return Err(TorshError::InvalidArgument(format!(
"Expected 4D input tensor for conv_transpose2d, got {}D",
input_shape.len()
)));
}
if weight_shape.len() != 4 {
return Err(TorshError::InvalidArgument(format!(
"Expected 4D weight tensor for conv_transpose2d, got {}D",
weight_shape.len()
)));
}
let batch_size = input_shape[0];
let in_channels = input_shape[1];
let input_height = input_shape[2];
let input_width = input_shape[3];
let out_channels = weight_shape[1] * groups;
let kernel_height = weight_shape[2];
let kernel_width = weight_shape[3];
if in_channels % groups != 0 || out_channels % groups != 0 {
return Err(TorshError::InvalidArgument(
"in_channels and out_channels must be divisible by groups".to_string(),
));
}
if weight_shape[0] != in_channels {
return Err(TorshError::InvalidArgument(format!(
"Weight tensor has wrong number of input channels: expected {}, got {}",
in_channels, weight_shape[0]
)));
}
let effective_kernel_h = (kernel_height - 1) * dilation.0 + 1;
let effective_kernel_w = (kernel_width - 1) * dilation.1 + 1;
let output_height =
(input_height - 1) * stride.0 - 2 * padding.0 + effective_kernel_h + output_padding.0;
let output_width =
(input_width - 1) * stride.1 - 2 * padding.1 + effective_kernel_w + output_padding.1;
let mut output_data = vec![
<T as TensorElement>::zero();
batch_size * out_channels * output_height * output_width
];
let self_data = self.to_vec()?;
let weight_data = weight.to_vec()?;
for n in 0..batch_size {
for g in 0..groups {
let in_ch_start = g * (in_channels / groups);
let in_ch_end = (g + 1) * (in_channels / groups);
let out_ch_start = g * (out_channels / groups);
let out_ch_end = (g + 1) * (out_channels / groups);
for ic in in_ch_start..in_ch_end {
for ih in 0..input_height {
for iw in 0..input_width {
let input_val = self_data[n * in_channels * input_height * input_width
+ ic * input_height * input_width
+ ih * input_width
+ iw];
for oc in out_ch_start..out_ch_end {
let oc_rel = oc - out_ch_start;
for kh in 0..kernel_height {
for kw in 0..kernel_width {
let oh = ih * stride.0 + kh * dilation.0;
let ow = iw * stride.1 + kw * dilation.1;
if oh >= padding.0 && ow >= padding.1 {
let oh_final = oh - padding.0;
let ow_final = ow - padding.1;
if oh_final < output_height && ow_final < output_width {
let weight_idx = ic
* (out_channels / groups)
* kernel_height
* kernel_width
+ oc_rel * kernel_height * kernel_width
+ kh * kernel_width
+ kw;
let output_idx =
n * out_channels * output_height * output_width
+ oc * output_height * output_width
+ oh_final * output_width
+ ow_final;
output_data[output_idx] = output_data[output_idx]
+ input_val * weight_data[weight_idx];
}
}
}
}
}
}
}
}
}
}
let mut output = Tensor::from_data(
output_data,
vec![batch_size, out_channels, output_height, output_width],
self.device(),
)?;
if let Some(b) = bias {
if b.shape().dims() != [out_channels] {
return Err(TorshError::InvalidArgument(format!(
"Bias must have shape [{}], got {:?}",
out_channels,
b.shape().dims()
)));
}
let bias_data = b.to_vec()?;
let mut output_data = output.to_vec()?;
for n in 0..batch_size {
#[allow(clippy::needless_range_loop)]
for oc in 0..out_channels {
for oh in 0..output_height {
for ow in 0..output_width {
let idx = n * out_channels * output_height * output_width
+ oc * output_height * output_width
+ oh * output_width
+ ow;
output_data[idx] = output_data[idx] + bias_data[oc];
}
}
}
}
output = Tensor::from_data(
output_data,
vec![batch_size, out_channels, output_height, output_width],
self.device(),
)?;
}
if self.requires_grad
|| weight.requires_grad
|| (bias.is_some() && bias.expect("bias checked with is_some").requires_grad)
{
use std::sync::Arc;
output.requires_grad = true;
output.operation = crate::Operation::Custom(
"conv_transpose2d".to_string(),
vec![
Arc::downgrade(&Arc::new(self.clone())),
Arc::downgrade(&Arc::new(weight.clone())),
],
);
}
Ok(output)
}
#[allow(clippy::needless_range_loop)]
pub fn xcorr1d(&self, other: &Self, mode: CorrelationMode) -> Result<Self> {
let self_shape_ref = self.shape();
let other_shape_ref = other.shape();
let self_shape = self_shape_ref.dims();
let other_shape = other_shape_ref.dims();
if self_shape.len() != 1 || other_shape.len() != 1 {
return Err(TorshError::InvalidArgument(
"xcorr1d requires 1D tensors".to_string(),
));
}
let n = self_shape[0];
let m = other_shape[0];
let (output_size, lag_start) = match mode {
CorrelationMode::Full => (n + m - 1, -(m as i32 - 1)),
CorrelationMode::Valid => {
if n < m || m < n {
return Err(TorshError::InvalidArgument(
"Valid mode requires both tensors to have the same size or one to be smaller".to_string(),
));
}
(std::cmp::max(n, m) - std::cmp::min(n, m) + 1, 0)
}
CorrelationMode::Same => (n, -((m as i32 - 1) / 2)),
};
let mut output_data = vec![<T as TensorElement>::zero(); output_size];
let self_data = self.to_vec()?;
let other_data = other.to_vec()?;
for i in 0..output_size {
let mut sum = <T as TensorElement>::zero();
let lag = lag_start + i as i32;
for j in 0..n {
let other_idx = j as i32 - lag;
if other_idx >= 0 && (other_idx as usize) < m {
sum = sum + self_data[j] * other_data[other_idx as usize];
}
}
output_data[i] = sum;
}
let output = Tensor::from_data(output_data, vec![output_size], self.device())?;
Ok(output)
}
pub fn autocorr1d(&self, max_lag: Option<usize>) -> Result<Self> {
let shape_ref = self.shape();
let shape = shape_ref.dims();
if shape.len() != 1 {
return Err(TorshError::InvalidArgument(
"autocorr1d requires 1D tensor".to_string(),
));
}
let n = shape[0];
let max_lag = max_lag.unwrap_or(n - 1).min(n - 1);
let self_data = self.to_vec()?;
let mut output_data = Vec::with_capacity(max_lag + 1);
for lag in 0..=max_lag {
let mut sum = <T as TensorElement>::zero();
for i in lag..n {
sum = sum + self_data[i] * self_data[i - lag];
}
output_data.push(sum);
}
let output = Tensor::from_data(output_data, vec![max_lag + 1], self.device())?;
Ok(output)
}
pub fn xcorr2d(&self, other: &Self, mode: CorrelationMode) -> Result<Self> {
let self_shape_ref = self.shape();
let other_shape_ref = other.shape();
let self_shape = self_shape_ref.dims();
let other_shape = other_shape_ref.dims();
if self_shape.len() != 2 || other_shape.len() != 2 {
return Err(TorshError::InvalidArgument(
"xcorr2d requires 2D tensors".to_string(),
));
}
let (h1, w1) = (self_shape[0], self_shape[1]);
let (h2, w2) = (other_shape[0], other_shape[1]);
let (out_h, out_w, start_h, start_w) = match mode {
CorrelationMode::Full => (h1 + h2 - 1, w1 + w2 - 1, 0, 0),
CorrelationMode::Valid => {
if h1 < h2 || w1 < w2 {
return Err(TorshError::InvalidArgument(
"Valid mode requires first tensor to be larger than or equal to second"
.to_string(),
));
}
(h1 - h2 + 1, w1 - w2 + 1, h2 - 1, w2 - 1)
}
CorrelationMode::Same => (h1, w1, (h2 - 1) / 2, (w2 - 1) / 2),
};
let mut output_data = vec![<T as TensorElement>::zero(); out_h * out_w];
let self_data = self.to_vec()?;
let other_data = other.to_vec()?;
for i in 0..out_h {
for j in 0..out_w {
let mut sum = <T as TensorElement>::zero();
let actual_i = i + start_h;
let actual_j = j + start_w;
for ki in 0..h2 {
for kj in 0..w2 {
let src_i = actual_i as i32 - ki as i32;
let src_j = actual_j as i32 - kj as i32;
if src_i >= 0
&& (src_i as usize) < h1
&& src_j >= 0
&& (src_j as usize) < w1
{
let self_idx = src_i as usize * w1 + src_j as usize;
let other_idx = ki * w2 + kj;
sum = sum + self_data[self_idx] * other_data[other_idx];
}
}
}
output_data[i * out_w + j] = sum;
}
}
let output = Tensor::from_data(output_data, vec![out_h, out_w], self.device())?;
Ok(output)
}
pub fn median_filter1d(&self, window_size: usize) -> Result<Self> {
let shape_ref = self.shape();
let shape = shape_ref.dims();
if shape.len() != 1 {
return Err(TorshError::InvalidArgument(
"median_filter1d requires 1D tensor".to_string(),
));
}
if window_size == 0 || window_size % 2 == 0 {
return Err(TorshError::InvalidArgument(
"Window size must be odd and greater than 0".to_string(),
));
}
let n = shape[0];
let half_window = window_size / 2;
let mut output_data = Vec::with_capacity(n);
let self_data = self.to_vec()?;
for i in 0..n {
let mut window_values = Vec::new();
for j in 0..window_size {
let idx = i as i32 + j as i32 - half_window as i32;
let actual_idx = if idx < 0 {
0
} else if idx >= n as i32 {
n - 1
} else {
idx as usize
};
window_values.push(self_data[actual_idx]);
}
window_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
output_data.push(window_values[half_window]);
}
let output = Tensor::from_data(output_data, vec![n], self.device())?;
Ok(output)
}
pub fn median_filter2d(&self, window_size: (usize, usize)) -> Result<Self> {
let shape_ref = self.shape();
let shape = shape_ref.dims();
if shape.len() != 2 {
return Err(TorshError::InvalidArgument(
"median_filter2d requires 2D tensor".to_string(),
));
}
let (window_h, window_w) = window_size;
if window_h == 0 || window_w == 0 || window_h % 2 == 0 || window_w % 2 == 0 {
return Err(TorshError::InvalidArgument(
"Window dimensions must be odd and greater than 0".to_string(),
));
}
let (h, w) = (shape[0], shape[1]);
let half_h = window_h / 2;
let half_w = window_w / 2;
let mut output_data = Vec::with_capacity(h * w);
let self_data = self.to_vec()?;
for i in 0..h {
for j in 0..w {
let mut window_values = Vec::new();
for di in 0..window_h {
for dj in 0..window_w {
let row = i as i32 + di as i32 - half_h as i32;
let col = j as i32 + dj as i32 - half_w as i32;
let actual_row = row.max(0).min(h as i32 - 1) as usize;
let actual_col = col.max(0).min(w as i32 - 1) as usize;
window_values.push(self_data[actual_row * w + actual_col]);
}
}
window_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
output_data.push(window_values[window_values.len() / 2]);
}
}
let output = Tensor::from_data(output_data, vec![h, w], self.device())?;
Ok(output)
}
pub fn gaussian_filter1d(&self, sigma: f32, kernel_size: Option<usize>) -> Result<Self> {
let tensor_shape = self.shape();
let shape = tensor_shape.dims();
if shape.len() != 1 {
return Err(TorshError::InvalidArgument(
"gaussian_filter1d requires 1D tensor".to_string(),
));
}
if sigma <= 0.0 {
return Err(TorshError::InvalidArgument(
"Sigma must be positive".to_string(),
));
}
let kernel_size = kernel_size.unwrap_or(((6.0 * sigma) as usize).max(3));
let kernel_size = if kernel_size % 2 == 0 {
kernel_size + 1
} else {
kernel_size
};
let half_size = kernel_size / 2;
let mut kernel = Vec::with_capacity(kernel_size);
let mut sum = 0.0f32;
for i in 0..kernel_size {
let x = i as f32 - half_size as f32;
let value = (-0.5 * (x / sigma).powi(2)).exp();
kernel.push(value);
sum += value;
}
for value in &mut kernel {
*value /= sum;
}
let kernel_data: Vec<T> = kernel
.into_iter()
.map(|v| {
T::from(v as f64)
.unwrap_or_else(|| T::from(0.0).expect("numeric conversion should succeed"))
})
.collect();
let kernel_tensor = Tensor::from_data(kernel_data, vec![kernel_size], self.device())?;
self.xcorr1d(&kernel_tensor, CorrelationMode::Same)
}
pub fn gaussian_filter2d(
&self,
sigma: (f32, f32),
kernel_size: Option<(usize, usize)>,
) -> Result<Self> {
let tensor_shape = self.shape();
let shape = tensor_shape.dims();
if shape.len() != 2 {
return Err(TorshError::InvalidArgument(
"gaussian_filter2d requires 2D tensor".to_string(),
));
}
let (sigma_x, sigma_y) = sigma;
if sigma_x <= 0.0 || sigma_y <= 0.0 {
return Err(TorshError::InvalidArgument(
"Sigma values must be positive".to_string(),
));
}
let (kernel_h, kernel_w) = kernel_size.unwrap_or((
((6.0 * sigma_y) as usize).max(3),
((6.0 * sigma_x) as usize).max(3),
));
let kernel_h = if kernel_h % 2 == 0 {
kernel_h + 1
} else {
kernel_h
};
let kernel_w = if kernel_w % 2 == 0 {
kernel_w + 1
} else {
kernel_w
};
let half_h = kernel_h / 2;
let half_w = kernel_w / 2;
let mut kernel = Vec::with_capacity(kernel_h * kernel_w);
let mut sum = 0.0f32;
for i in 0..kernel_h {
for j in 0..kernel_w {
let y = i as f32 - half_h as f32;
let x = j as f32 - half_w as f32;
let value = (-0.5 * ((x / sigma_x).powi(2) + (y / sigma_y).powi(2))).exp();
kernel.push(value);
sum += value;
}
}
for value in &mut kernel {
*value /= sum;
}
let kernel_data: Vec<T> = kernel
.into_iter()
.map(|v| {
T::from(v as f64)
.unwrap_or_else(|| T::from(0.0).expect("numeric conversion should succeed"))
})
.collect();
let kernel_tensor =
Tensor::from_data(kernel_data, vec![kernel_h, kernel_w], self.device())?;
self.xcorr2d(&kernel_tensor, CorrelationMode::Same)
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum CorrelationMode {
Full,
Valid,
Same,
}