Struct Tensor

Source

pub struct Tensor<T = f32>where
    T: TensorElement,
{ /* private fields */ }

Expand description

The main Tensor type for ToRSh

A tensor implementation with automatic memory mapping for large tensors and efficient views with reference counting

Implementations§

Source §

impl<T> Tensor<T>
where T: FloatElement + Copy,

Source

Returns the k largest elements along a dimension

Source §

Backward pass for complex tensors (compute gradients)

Complex autograd follows PyTorch’s approach where gradients are computed treating complex numbers as 2D vectors of real numbers.

Source

pub fn complex_mul(&self, other: &Tensor<T>) -> Result<Tensor<T>, TorshError>
where T: Mul<Output = T> + Add<Output = T> + Sub<Output = T>,

Element-wise complex multiplication with proper gradient tracking

Source

pub fn complex_add(&self, other: &Tensor<T>) -> Result<Tensor<T>, TorshError>
where T: Add<Output = T>,

Element-wise complex addition with proper gradient tracking

Source

pub fn is_real(&self) -> Result<bool, TorshError>
where <T as ComplexElement>::Real: PartialEq + Zero,

Check if all elements in the tensor are real (imaginary part is zero)

Source

pub fn is_complex(&self) -> Result<bool, TorshError>
where <T as ComplexElement>::Real: PartialEq + Zero,

Check if any elements in the tensor are complex (imaginary part is non-zero)

Source §

Execute a function with zero-copy access to tensor data (immutable)

This enables SIMD operations without memory copies by providing direct access to the underlying buffer within a scoped context.

§Arguments

f - Closure that receives &[T] and returns Result<R>

§Returns

Result from the closure

§Performance

Zero memory copies for InMemory and Aligned storage
One allocation for MemoryMapped storage
Enables 2-4x SIMD speedup (per SciRS2 docs)

§Examples

// Direct SIMD operation without copies
let result = tensor.with_data_slice(|data| {
    other_tensor.with_data_slice(|other_data| {
        // Zero-copy SIMD addition
        f32::simd_add(&data, &other_data)
    })
})?;

Source

pub fn with_data_slice_mut<R, F>(&self, f: F) -> Result<R, TorshError>
where F: FnOnce(&mut [T]) -> Result<R, TorshError>, T: Copy,

Execute a function with zero-copy access to tensor data (mutable)

This enables in-place SIMD operations without memory copies.

§Arguments

f - Closure that receives &mut [T] and returns Result<R>

§Returns

Result from the closure

§Performance

Zero memory copies for InMemory storage
Not supported for MemoryMapped or Aligned storage (returns error)

§Examples

// In-place SIMD operation without copies
tensor.with_data_slice_mut(|data| {
    other_tensor.with_data_slice(|other_data| {
        // Zero-copy in-place SIMD addition
        for (x, y) in data.iter_mut().zip(other_data.iter()) {
            *x = *x + *y;
        }
        Ok(())
    })
})?;

Source

pub fn ones_like(&self) -> Result<Tensor<T>, TorshError>

Create a tensor of ones with the same shape as this tensor

Source

pub fn zeros_like(&self) -> Result<Tensor<T>, TorshError>

Create a tensor of zeros with the same shape as this tensor

Source

pub fn requires_grad_(self, requires_grad: bool) -> Tensor<T>

Enable or disable gradient tracking for this tensor.

This method marks the tensor as requiring (or not requiring) gradient computation during the backward pass. It consumes self and returns the modified tensor, allowing for method chaining in the builder pattern.

§Parameters

requires_grad: If true, gradients will be computed and stored for this tensor during backward passes. If false, gradients will not be computed.

§Important Notes

Only leaf tensors (tensors created directly, not from operations) store gradients
Intermediate tensors in the computation graph don’t store gradients, only leaf tensors do
Setting requires_grad=true enables tracking for all subsequent operations
You should typically set this for model parameters and input data you want to optimize

§Examples

§Basic Usage

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

// Create tensor and enable gradient tracking
let x = Tensor::from_data(vec![1.0, 2.0, 3.0], vec![3], DeviceType::Cpu)?
    .requires_grad_(true);

assert!(x.requires_grad());

§Neural Network Parameters

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

// Create trainable weights
let weights = Tensor::randn(&[784, 128], DeviceType::Cpu)?
    .requires_grad_(true);

let bias = Tensor::zeros(&[128], DeviceType::Cpu)?
    .requires_grad_(true);

// Use in forward pass
let input = Tensor::randn(&[32, 784], DeviceType::Cpu)?;
let output = input.matmul(&weights)?.add(&bias)?;

§Freezing Layers (Transfer Learning)

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

// Pretrained layer - don't compute gradients
let frozen_weights = Tensor::randn(&[512, 256], DeviceType::Cpu)?
    .requires_grad_(false);

// New layer - compute gradients
let trainable_weights = Tensor::randn(&[256, 10], DeviceType::Cpu)?
    .requires_grad_(true);

§Method Chaining

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

// Chain multiple operations
let x = Tensor::ones(&[5], DeviceType::Cpu)?
    .requires_grad_(true)
    .mul_scalar(2.0)?
    .add_scalar(1.0)?;

assert!(x.requires_grad());

§Gradient Propagation

When a tensor has requires_grad=true, all operations on it will also have requires_grad=true, building the computation graph:

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let x = Tensor::from_data(vec![2.0], vec![1], DeviceType::Cpu)?
    .requires_grad_(true);

let y = x.pow(2.0)?;        // y also requires grad
let z = y.mul_scalar(3.0)?;  // z also requires grad
let w = z.add_scalar(1.0)?;  // w also requires grad

assert!(y.requires_grad());
assert!(z.requires_grad());
assert!(w.requires_grad());

§See Also

backward() - Compute gradients
grad() - Access computed gradients
detach() - Create copy without gradient tracking

Source

pub fn requires_grad(&self) -> bool

Check if this tensor requires gradient computation.

Returns true if gradients will be computed for this tensor during backward passes, false otherwise.

§Examples

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let x = Tensor::ones(&[3], DeviceType::Cpu)?.requires_grad_(true);
assert!(x.requires_grad());

let y = Tensor::ones(&[3], DeviceType::Cpu)?;
assert!(!y.requires_grad());

§See Also

requires_grad_() - Set gradient tracking

Source

pub fn set_grad(&self, grad: Option<Tensor<T>>)

Set gradient tensor

Source

pub fn grad_mut(&mut self) -> Option<&mut Tensor<T>>

Get mutable access to gradient

Source

pub fn to<D>(self, device: D) -> Result<Tensor<T>, TorshError>
where D: Into<DeviceType>,

🚀 Enhanced device transfer with multi-backend GPU support Automatically selects optimal transfer strategy and backend

Source

pub fn distribute_multi_gpu_wrapper( &self, gpu_count: usize, ) -> Result<Vec<Tensor<T>>, TorshError>

Source

pub fn detach(&self) -> Tensor<T>

Create a detached copy of this tensor that doesn’t track gradients.

This method creates a new tensor with the same data as the original, but with requires_grad=false. The detached tensor is not part of the computation graph and will not participate in gradient computation, even if the original tensor did.

§Use Cases

Inference with trained parameters: Use detached weights for forward-only computation
Custom gradient logic: Manually control which tensors participate in backprop
Debugging: Inspect intermediate values without affecting the gradient flow
Mixed training: Some computations need gradients, others don’t
Memory optimization: Reduce memory usage by not tracking gradients for certain operations

§Important Notes

The returned tensor shares no gradient history with the original
Operations on the detached tensor will also have requires_grad=false
This creates a copy of the data (not a view) - consider memory implications
The detached tensor is a “leaf” tensor with no computation history

§Examples

§Basic Detachment

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let x = Tensor::from_data(vec![1.0, 2.0, 3.0], vec![3], DeviceType::Cpu)?
    .requires_grad_(true);

// Create detached copy
let x_detached = x.detach();

// Original tracks gradients, detached does not
assert!(x.requires_grad());
assert!(!x_detached.requires_grad());

// Operations on detached tensor don't track gradients
let y_detached = x_detached.pow(2.0)?;
assert!(!y_detached.requires_grad());

§Stopping Gradient Flow

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

// This pattern prevents gradients from flowing through certain operations
let x = Tensor::from_data(vec![2.0], vec![1], DeviceType::Cpu)?
    .requires_grad_(true);

// Compute something we want to use but not backpropagate through
let intermediate = x.pow(2.0)?;
let intermediate_detached = intermediate.detach();

// Use detached version in further computation
let y = intermediate_detached.mul_scalar(3.0)?;
let loss = y.sum()?;

// Backward won't compute gradients for x
// (because gradient flow is stopped at detach point)

§Target Tensor Pattern (No Gradients for Labels)

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

// Predictions need gradients
let predictions = Tensor::randn(&[32, 10], DeviceType::Cpu)?
    .requires_grad_(true);

// Target labels should NOT have gradients
let targets = Tensor::from_data(
    vec![0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    vec![1, 10],
    DeviceType::Cpu
)?.detach();  // Explicitly detach to ensure no gradients

assert!(predictions.requires_grad());
assert!(!targets.requires_grad());

// Compute loss - only predictions will have gradients
let diff = predictions.sub(&targets)?;
let loss = diff.pow(2.0)?.mean()?;
loss.backward()?;

§Debugging Intermediate Values

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let x = Tensor::randn(&[100], DeviceType::Cpu)?
    .requires_grad_(true);

// Complex computation
let y = x.pow(2.0)?.add_scalar(1.0)?;

// Detach to inspect values without affecting gradient computation
let y_values = y.detach();
let max_val = y_values.to_vec()?.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
println!("Max value: {}", max_val);

// Continue with original gradient-tracking tensor
let loss = y.sum()?;
loss.backward()?;

§Feature Extraction (Transfer Learning)

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

// Pretrained model features - don't need gradients
fn pretrained_feature_extractor(input: &Tensor<f32>) -> Result<Tensor<f32>, torsh_core::error::TorshError> {
    // Complex pretrained network...
    let features = input.matmul(&Tensor::randn(&[784, 512], DeviceType::Cpu)?)?;
    // Detach to stop gradient flow
    Ok(features.detach())
}

// New trainable classifier head
let input = Tensor::randn(&[32, 784], DeviceType::Cpu)?;
let features = pretrained_feature_extractor(&input)?;

// Only this part will have gradients
let weights = Tensor::randn(&[512, 10], DeviceType::Cpu)?
    .requires_grad_(true);
let output = features.matmul(&weights)?;

§Memory Optimization Example

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let x = Tensor::randn(&[1000, 1000], DeviceType::Cpu)?
    .requires_grad_(true);

// Large intermediate computation
let intermediate = x.matmul(&x.transpose(&[1, 0])?)?;

// If we only need the values, not gradients
let result = intermediate.detach();

// Now intermediate computation graph can be freed
drop(intermediate);

// Use result without gradient tracking
let final_result = result.mul_scalar(0.5)?;

§Comparison with no_grad Context

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;
use torsh_autograd::guards::no_grad;

let x = Tensor::randn(&[5], DeviceType::Cpu)?
    .requires_grad_(true);

// Method 1: Detach - creates a copy
let y1 = x.detach().pow(2.0)?;
assert!(!y1.requires_grad());

// Method 2: no_grad context - affects all operations in scope
let y2 = {
    let _guard = no_grad();
    x.pow(2.0)?
};
assert!(!y2.requires_grad());

// Note: detach() is for selective detachment of specific tensors
// no_grad() is for disabling gradient tracking in a code block

§Detach vs Clone

detach(): Creates a copy with requires_grad=false
clone(): Creates a copy preserving requires_grad status

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let x = Tensor::ones(&[3], DeviceType::Cpu)?
    .requires_grad_(true);

let x_clone = x.clone();
let x_detach = x.detach();

assert!(x_clone.requires_grad());   // Preserves gradient tracking
assert!(!x_detach.requires_grad()); // Disables gradient tracking

§See Also

requires_grad_() - Set gradient tracking
torsh_autograd::guards::no_grad() - Disable gradients in a scope
backward() - Compute gradients

Source

pub fn grad(&self) -> Option<Tensor<T>>

Access the computed gradient for this tensor.

Returns Some(gradient_tensor) if gradients have been computed via backward(), or None if no gradients exist yet.

§Important Notes

Only leaf tensors store gradients (tensors created directly, not from operations)
You must call backward() on a scalar output before gradients exist
Gradients accumulate across multiple backward() calls unless cleared with zero_grad()
The returned gradient tensor has the same shape as the original tensor

§Returns

Some(Tensor) - The gradient tensor if computed
None - If backward has not been called or gradients were cleared

§Examples

§Basic Gradient Access

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let x = Tensor::from_data(vec![2.0], vec![1], DeviceType::Cpu)?
    .requires_grad_(true);

// Before backward: no gradient
assert!(x.grad().is_none());

// Compute gradient
let y = x.pow(2.0)?;
y.backward()?;

// After backward: gradient exists
let grad = x.grad().expect("gradient should exist");
assert_eq!(grad.item()?, 4.0);  // dy/dx = 2x = 4

§Multi-Dimensional Gradients

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let x = Tensor::from_data(
    vec![1.0, 2.0, 3.0, 4.0],
    vec![2, 2],
    DeviceType::Cpu
)?.requires_grad_(true);

// Loss function: sum of squares
let y = x.pow(2.0)?;
let loss = y.sum()?;
loss.backward()?;

// Gradient has same shape as input
let grad = x.grad().expect("gradient should exist");
assert_eq!(grad.shape().dims(), &[2, 2]);
// Gradient values: [2, 4, 6, 8] (2 * each input)

§Using Gradients for Optimization

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let mut weights = Tensor::randn(&[10], DeviceType::Cpu)?
    .requires_grad_(true);

let learning_rate = 0.01;

// Training loop
for step in 0..100 {
    // Forward pass
    let output = weights.mul_scalar(2.0)?;
    let loss = output.sum()?;

    // Backward pass
    loss.backward()?;

    // Get gradients and update weights
    if let Some(grad) = weights.grad() {
        // weights = weights - learning_rate * grad
        let update = grad.mul_scalar(learning_rate)?;
        weights = weights.sub(&update)?;
    }

    // Clear gradients for next iteration
    weights.zero_grad();
}

§Gradient Accumulation Check

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let mut x = Tensor::from_data(vec![1.0], vec![1], DeviceType::Cpu)?
    .requires_grad_(true);

// First backward pass
let y1 = x.pow(2.0)?;
y1.backward()?;
let grad1 = x.grad().expect("gradient should exist").item()?;
assert_eq!(grad1, 2.0);

// Second backward pass without zeroing - accumulates!
let y2 = x.pow(2.0)?;
y2.backward()?;
let grad2 = x.grad().expect("gradient should exist").item()?;
assert_eq!(grad2, 4.0);  // 2.0 + 2.0 = 4.0

// Clear for next iteration
x.zero_grad();
assert!(x.grad().is_none());

§Common Patterns

§Safe Gradient Extraction

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

// Option 1: Unwrap (panics if no gradient)
let grad = x.grad().expect("gradient should exist");

// Option 2: Pattern matching (safer)
match x.grad() {
    Some(grad) => {
        // Use gradient
        println!("Gradient: {:?}", grad);
    }
    None => {
        println!("No gradient computed yet");
    }
}

// Option 3: if-let pattern
if let Some(grad) = x.grad() {
    // Use gradient
}

§See Also

backward() - Compute gradients
zero_grad() - Clear gradients
has_grad() - Check if gradients exist
requires_grad_() - Enable gradient tracking

Source

pub fn has_grad(&self) -> bool

Check if this tensor has a computed gradient.

Returns true if gradients have been computed and stored, false otherwise. This is equivalent to tensor.grad().is_some() but more explicit.

§Examples

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let x = Tensor::from_data(vec![2.0], vec![1], DeviceType::Cpu)?
    .requires_grad_(true);

assert!(!x.has_grad());  // No gradient yet

let y = x.pow(2.0)?;
y.backward()?;

assert!(x.has_grad());   // Gradient now exists

§See Also

grad() - Access the gradient tensor

Source

pub fn zero_grad(&mut self)

Clear the gradient for this tensor.

This method sets the gradient to None, effectively resetting it. You should call this method between training iterations to prevent gradient accumulation when you don’t want it.

§When to Use

After optimizer step: Clear gradients before the next training iteration
Between validation runs: Ensure clean state for evaluation
After gradient accumulation: Clear after applying accumulated gradients

§Important Notes

Gradients accumulate by default across multiple backward() calls
Always call zero_grad() between training iterations unless you explicitly want gradient accumulation
This only clears the gradient; it doesn’t affect requires_grad status

§Examples

§Standard Training Loop

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let mut weights = Tensor::randn(&[10], DeviceType::Cpu)?
    .requires_grad_(true);

for epoch in 0..100 {
    // Forward pass
    let output = weights.mul_scalar(2.0)?;
    let loss = output.sum()?;

    // Backward pass
    loss.backward()?;

    // Update weights (simplified)
    if let Some(grad) = weights.grad() {
        weights = weights.sub(&grad.mul_scalar(0.01)?)?;
    }

    // CRITICAL: Clear gradients for next iteration
    weights.zero_grad();
}

§Gradient Accumulation Pattern

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let mut model_params = Tensor::randn(&[100], DeviceType::Cpu)?
    .requires_grad_(true);

let accumulation_steps = 4;

// Zero gradients at start
model_params.zero_grad();

for step in 0..accumulation_steps {
    // Forward and backward for mini-batch
    let output = model_params.mul_scalar(2.0)?;
    let loss = output.sum()?;
    loss.backward()?;

    // DON'T zero gradients here - let them accumulate
}

// After accumulation, update weights
if let Some(grad) = model_params.grad() {
    // Scale by number of accumulation steps
    let scaled_grad = grad.div_scalar(accumulation_steps as f32)?;
    model_params = model_params.sub(&scaled_grad.mul_scalar(0.01)?)?;
}

// NOW clear gradients for next accumulation cycle
model_params.zero_grad();

§Multiple Parameter Update

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let mut weights = Tensor::randn(&[10, 5], DeviceType::Cpu)?
    .requires_grad_(true);
let mut bias = Tensor::zeros(&[5], DeviceType::Cpu)?
    .requires_grad_(true);

// Training step
let input = Tensor::randn(&[32, 10], DeviceType::Cpu)?;
let output = input.matmul(&weights)?.add(&bias)?;
let loss = output.sum()?;

loss.backward()?;

// Update both parameters
// ... (weight updates)

// Clear gradients for both
weights.zero_grad();
bias.zero_grad();

§Validation Without Gradient Tracking

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;
use torsh_autograd::guards::no_grad;

let weights = Tensor::randn(&[10], DeviceType::Cpu)?
    .requires_grad_(true);

// During validation, use no_grad guard
let validation_loss = {
    let _guard = no_grad();
    let output = weights.mul_scalar(2.0)?;
    output.sum()?
};

// No gradients computed during validation, so nothing to zero
// But good practice to ensure clean state
// weights.zero_grad();

§Common Mistake

§Forgetting to Zero Gradients

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let mut x = Tensor::from_data(vec![1.0], vec![1], DeviceType::Cpu)?
    .requires_grad_(true);

// Iteration 1
let y1 = x.pow(2.0)?;
y1.backward()?;
let grad1 = x.grad().expect("gradient should exist").item()?;
println!("Grad 1: {}", grad1);  // 2.0

// Iteration 2 - FORGOT TO ZERO!
let y2 = x.pow(2.0)?;
y2.backward()?;
let grad2 = x.grad().expect("gradient should exist").item()?;
println!("Grad 2: {}", grad2);  // 4.0 (WRONG! Should be 2.0)

// Correct approach:
x.zero_grad();  // Clear between iterations
let y3 = x.pow(2.0)?;
y3.backward()?;
let grad3 = x.grad().expect("gradient should exist").item()?;
println!("Grad 3: {}", grad3);  // 2.0 (CORRECT!)

§See Also

grad() - Access computed gradients
backward() - Compute gradients
has_grad() - Check if gradients exist

Source

pub fn backward(&self) -> Result<(), TorshError>
where T: FloatElement<Output = T, Output = T, Output = T, Output = T> + Copy + Default + Add + Sub + Mul + Div + Clone + Debug, f32: From<T>,

Computes gradients for all tensors in the computation graph.

This method performs backpropagation through the computation graph, computing gradients for all tensors with requires_grad=true. The backward pass starts from this tensor (which must be a scalar) and propagates gradients back through the computational graph to all leaf tensors.

§Requirements

This tensor must be a scalar (single element) - use .sum() or similar reduction operations to create a scalar from multi-dimensional tensors
This tensor must have requires_grad=true
All tensors in the computation graph are retained during backward pass

§How It Works

The backward pass:

Starts with a gradient of 1.0 for the output (scalar) tensor
Traverses the computation graph in reverse topological order
Applies the chain rule at each operation node
Accumulates gradients at leaf nodes (input tensors)

§Gradient Accumulation

If you call backward() multiple times without zeroing gradients, the gradients will accumulate (add together). This is useful for:

Gradient accumulation across mini-batches
Computing gradients for multiple outputs

Use zero_grad() to clear gradients between iterations.

§Examples

§Basic Gradient Computation

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

// Create input tensor with gradient tracking enabled
let x = Tensor::from_data(vec![2.0f32], vec![1], DeviceType::Cpu)?
    .requires_grad_(true);

// Forward pass: y = x^2
let y = x.pow(2.0)?;

// Backward pass: compute dy/dx = 2x
y.backward()?;

// Access gradient: should be 2 * 2.0 = 4.0
let grad = x.grad().expect("gradient should exist");
assert_eq!(grad.item()?, 4.0);

§Multi-Variable Function

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

// f(x, y) = x^2 + 2xy + y^2
let x = Tensor::from_data(vec![3.0f32], vec![1], DeviceType::Cpu)?
    .requires_grad_(true);
let y = Tensor::from_data(vec![4.0f32], vec![1], DeviceType::Cpu)?
    .requires_grad_(true);

// Forward pass
let x_squared = x.pow(2.0)?;
let xy = x.mul(&y)?;
let two_xy = xy.mul_scalar(2.0)?;
let y_squared = y.pow(2.0)?;
let result = x_squared.add(&two_xy)?.add(&y_squared)?;

// Backward pass
result.backward()?;

// df/dx = 2x + 2y = 6 + 8 = 14
// df/dy = 2x + 2y = 6 + 8 = 14
let grad_x = x.grad().expect("gradient should exist");
let grad_y = y.grad().expect("gradient should exist");

§Vector to Scalar (Loss Function)

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

// Create predictions and targets
let predictions = Tensor::from_data(
    vec![0.8, 0.6, 0.9],
    vec![3],
    DeviceType::Cpu
)?.requires_grad_(true);

let targets = Tensor::from_data(
    vec![1.0, 0.0, 1.0],
    vec![3],
    DeviceType::Cpu
)?;

// Compute MSE loss: mean((pred - target)^2)
let diff = predictions.sub(&targets)?;
let squared = diff.pow(2.0)?;
let loss = squared.mean()?; // Reduces to scalar

// Compute gradients
loss.backward()?;

// Gradient shows direction to reduce loss
let grad = predictions.grad().expect("gradient should exist");

§Gradient Accumulation Pattern

use torsh_tensor::Tensor;
use torsh_core::device::DeviceType;

let mut weights = Tensor::from_data(
    vec![0.5f32; 10],
    vec![10],
    DeviceType::Cpu
)?.requires_grad_(true);

// Accumulate gradients over multiple batches
for batch in 0..4 {
    // Forward pass for this batch
    let output = weights.mul_scalar(2.0)?;
    let loss = output.sum()?;

    // Backward pass - gradients accumulate
    loss.backward()?;

    // Don't zero gradients yet
}

// After accumulation, update weights
// optimizer.step();

// Clear gradients for next iteration
weights.zero_grad();

§Errors

Returns an error if:

The tensor doesn’t have requires_grad=true
The tensor is not a scalar (has more than 1 element)
An error occurs during gradient computation

§Common Pitfalls

§1. Forgetting to Create Scalar Output

let x = Tensor::from_data(vec![1.0, 2.0, 3.0], vec![3], DeviceType::Cpu)
    .expect("tensor creation should succeed")
    .requires_grad_(true);
let y = x.pow(2.0).expect("pow should succeed");

// ERROR: y has 3 elements, not a scalar!
y.backward().expect("backward pass should succeed");  // This will panic

Solution: Use reduction operations:

let y = x.pow(2.0)?.sum()?;  // Reduce to scalar
y.backward()?;

§2. Forgetting to Enable Gradient Tracking

let x = Tensor::from_data(vec![2.0], vec![1], DeviceType::Cpu).expect("tensor creation should succeed");
// Forgot .requires_grad_(true)
let y = x.pow(2.0).expect("pow should succeed");
y.backward().expect("backward pass should succeed");  // ERROR: requires_grad not set

§3. Not Zeroing Gradients Between Training Steps

let mut x = Tensor::from_data(vec![1.0], vec![1], DeviceType::Cpu)?
    .requires_grad_(true);

for epoch in 0..3 {
    let y = x.pow(2.0)?.sum()?;
    y.backward()?;

    // Gradients accumulate without this!
    x.zero_grad();  // IMPORTANT: Clear gradients
}

§See Also

requires_grad_() - Enable gradient tracking
grad() - Access computed gradients
zero_grad() - Clear gradients
detach() - Create non-differentiable copy

Source §

Scatter values along an axis using indices

Source

pub fn scatter_add( &self, dim: usize, indices: &Tensor<i64>, src: &Tensor<T>, ) -> Result<Tensor<T>, TorshError>
where T: Add<Output = T>,

Scatter values along an axis using indices and add to existing values

§PyTorch Compatibility

Equivalent to torch.scatter_add(tensor, dim, index, src)

§Arguments

dim - Dimension along which to index
indices - Index tensor (same shape as src)
src - Source tensor containing values to add

§Examples

let tensor = Tensor::zeros(&[5], DeviceType::Cpu)?;
let indices = Tensor::from_data(vec![0i64, 1, 2, 0, 1], vec![5], DeviceType::Cpu)?;
let src = Tensor::from_data(vec![1.0f32, 2.0, 3.0, 4.0, 5.0], vec![5], DeviceType::Cpu)?;
let result = tensor.scatter_add(0, &indices, &src)?;
// result[0] += 1.0 + 4.0 = 5.0
// result[1] += 2.0 + 5.0 = 7.0
// result[2] += 3.0 = 3.0

Source

pub fn repeat(&self, repeats: &[usize]) -> Result<Tensor<T>, TorshError>

Repeat tensor along specified dimensions

Source

pub fn index_add( &self, dim: isize, index: &Tensor<i64>, source: &Tensor<T>, ) -> Result<Tensor<T>, TorshError>
where T: Add<Output = T>,

Add values to tensor at specified indices along a dimension

§PyTorch Compatibility

Equivalent to torch.index_add(tensor, dim, index, source, alpha=1.0)

§Arguments

dim - Dimension along which to index
index - 1D tensor containing indices
source - Source tensor to add

§Examples

let tensor = Tensor::zeros(&[3, 5], DeviceType::Cpu)?;
let index = Tensor::from_data(vec![0i64, 2], vec![2], DeviceType::Cpu)?;
let source = Tensor::ones(&[2, 5], DeviceType::Cpu)?;
let result = tensor.index_add(0, &index, &source)?;

Source

pub fn index_copy( &self, dim: isize, index: &Tensor<i64>, source: &Tensor<T>, ) -> Result<Tensor<T>, TorshError>

Copy values from source to tensor at specified indices along a dimension

§PyTorch Compatibility

Equivalent to torch.index_copy(tensor, dim, index, source)

§Arguments

dim - Dimension along which to index
index - 1D tensor containing indices
source - Source tensor to copy from

§Examples

let tensor = Tensor::zeros(&[3, 5], DeviceType::Cpu)?;
let index = Tensor::from_data(vec![0i64, 2], vec![2], DeviceType::Cpu)?;
let source = Tensor::ones(&[2, 5], DeviceType::Cpu)?;
let result = tensor.index_copy(0, &index, &source)?;

Source

pub fn index_fill( &self, dim: isize, index: &Tensor<i64>, value: T, ) -> Result<Tensor<T>, TorshError>

Fill values in tensor at specified indices along a dimension

§PyTorch Compatibility

Equivalent to torch.index_fill(tensor, dim, index, value)

§Arguments

dim - Dimension along which to index
index - 1D tensor containing indices
value - Scalar value to fill

§Examples

let tensor = Tensor::zeros(&[3, 5], DeviceType::Cpu)?;
let index = Tensor::from_data(vec![0i64, 2], vec![2], DeviceType::Cpu)?;
let result = tensor.index_fill(0, &index, 3.14)?;

Source

pub fn put_( &self, indices: &Tensor<i64>, values: &Tensor<T>, ) -> Result<Tensor<T>, TorshError>

Place values at specified flat indices (in-place-like operation, returns new tensor)

§PyTorch Compatibility

Equivalent to torch.put_(tensor, indices, values) but returns new tensor

§Arguments

indices - 1D tensor of flat indices
values - 1D tensor of values (must match indices length or be broadcastable)

§Examples

let tensor = Tensor::zeros(&[3, 3], DeviceType::Cpu)?;  // [[0,0,0],[0,0,0],[0,0,0]]
let indices = Tensor::from_data(vec![0i64, 4, 8], vec![3], DeviceType::Cpu)?;
let values = Tensor::from_data(vec![1.0f32, 2.0, 3.0], vec![3], DeviceType::Cpu)?;
let result = tensor.put_(&indices, &values)?;  // [[1,0,0],[0,2,0],[0,0,3]]

Source

pub fn masked_scatter( &self, mask: &Tensor<bool>, source: &Tensor<T>, ) -> Result<Tensor<T>, TorshError>

Scatter values from source tensor where mask is true (PyTorch-compatible)

Copies values from the source tensor to positions where the mask is true. The mask must have the same shape as self. Source values are taken sequentially and placed at positions where mask is true.

§PyTorch Compatibility

Equivalent to torch.masked_scatter(tensor, mask, source)

§Arguments

mask - Boolean tensor with same shape as self
source - Tensor containing values to scatter (must have at least as many elements as true values in mask)

§Examples

let tensor = Tensor::zeros(&[3, 3], DeviceType::Cpu)?;
let mask = Tensor::from_data(
    vec![true, false, false, false, true, false, false, false, true],
    vec![3, 3],
    DeviceType::Cpu
)?;
let source = Tensor::from_data(vec![1.0f32, 2.0, 3.0], vec![3], DeviceType::Cpu)?;
let result = tensor.masked_scatter(&mask, &source)?;  // [[1,0,0],[0,2,0],[0,0,3]]

Source

pub fn index_put( &self, indices: &[Tensor<i64>], values: &Tensor<T>, ) -> Result<Tensor<T>, TorshError>

Multi-dimensional indexed put operation (PyTorch-compatible)

Places values from source tensor at positions specified by index tensors. Each index tensor specifies indices along one dimension. Index tensors must be broadcastable to the same shape.

§PyTorch Compatibility

Equivalent to torch.index_put(tensor, indices, values) where indices is a tuple of index tensors

§Arguments

indices - Slice of index tensors, one per dimension to index
values - Tensor of values to place (must broadcast to indexed positions)

§Examples

// 2D example: index_put a 3x3 matrix with row=[0,1] col=[1,2]
let tensor = Tensor::zeros(&[3, 3], DeviceType::Cpu)?;
let row_idx = Tensor::from_data(vec![0i64, 1], vec![2], DeviceType::Cpu)?;
let col_idx = Tensor::from_data(vec![1i64, 2], vec![2], DeviceType::Cpu)?;
let values = Tensor::from_data(vec![10.0f32, 20.0], vec![2], DeviceType::Cpu)?;
let result = tensor.index_put(&[row_idx, col_idx], &values)?;
// result[0,1] = 10.0, result[1,2] = 20.0

Source

pub fn scatter_reduce( &self, dim: usize, indices: &Tensor<i64>, src: &Tensor<T>, reduce: &str, ) -> Result<Tensor<T>, TorshError>
where T: Add<Output = T> + Mul<Output = T> + Div<Output = T> + PartialOrd + FromPrimitive,

Scatter with reduction operation (PyTorch-compatible)

Generalized scatter operation that applies a reduction operation (sum, prod, mean, etc.) when scattering values to the same index position.

§PyTorch Compatibility

Equivalent to torch.scatter_reduce(tensor, dim, index, src, reduce)

§Arguments

dim - Dimension along which to scatter
indices - Index tensor specifying where to scatter values
src - Source tensor containing values to scatter
reduce - Reduction operation (“sum”, “prod”, “mean”, “amax”, “amin”)

§Examples

let tensor = Tensor::zeros(&[5], DeviceType::Cpu)?;
let indices = Tensor::from_data(vec![0i64, 1, 2, 0, 1], vec![5], DeviceType::Cpu)?;
let src = Tensor::from_data(vec![1.0f32, 2.0, 3.0, 4.0, 5.0], vec![5], DeviceType::Cpu)?;
let result = tensor.scatter_reduce(0, &indices, &src, "sum")?;
// result[0] = 1.0 + 4.0 = 5.0 (sum reduction)
// result[1] = 2.0 + 5.0 = 7.0

Source

pub fn diagonal_scatter( &self, src: &Tensor<T>, offset: isize, dim1: usize, dim2: usize, ) -> Result<Tensor<T>, TorshError>

Scatter values to the diagonal (PyTorch-compatible)

Embeds the values of src tensor into self along the diagonal elements, with respect to dim1 and dim2. The offset determines which diagonal to use.

§PyTorch Compatibility

Equivalent to torch.diagonal_scatter(tensor, src, offset, dim1, dim2)

§Arguments

src - Source tensor containing values for the diagonal
offset - Diagonal offset (0=main diagonal, >0=above, <0=below)
dim1 - First dimension (default: 0)
dim2 - Second dimension (default: 1)

§Examples

let tensor = Tensor::zeros(&[3, 3], DeviceType::Cpu)?;
let src = Tensor::from_data(vec![1.0f32, 2.0, 3.0], vec![3], DeviceType::Cpu)?;
let result = tensor.diagonal_scatter(&src, 0, 0, 1)?;
// result = [[1, 0, 0], [0, 2, 0], [0, 0, 3]]

Source

pub fn select_scatter( &self, src: &Tensor<T>, dim: isize, index: isize, ) -> Result<Tensor<T>, TorshError>

Scatter values to a selected slice along dimension (PyTorch-compatible)

Embeds the values of src tensor into self at the given index along dimension dim. This is the inverse of select() operation.

§PyTorch Compatibility

Equivalent to torch.select_scatter(tensor, src, dim, index)

§Arguments

src - Source tensor to scatter (shape should match self with dim removed)
dim - Dimension along which to select
index - Index position to scatter to

§Examples

let tensor = Tensor::zeros(&[3, 4, 5], DeviceType::Cpu)?;
let src = Tensor::ones(&[3, 5], DeviceType::Cpu)?; // dim=1 removed
let result = tensor.select_scatter(&src, 1, 2)?;
// result[:, 2, :] = src

Source

pub fn slice_scatter( &self, src: &Tensor<T>, dim: isize, start: Option<isize>, end: Option<isize>, step: usize, ) -> Result<Tensor<T>, TorshError>

Scatter values to a slice along dimension (PyTorch-compatible)

Embeds the values of src tensor into self along dimension dim, starting at start index, ending at end index, with the given step.

§PyTorch Compatibility

Equivalent to torch.slice_scatter(tensor, src, dim, start, end, step)

§Arguments

src - Source tensor to scatter
dim - Dimension along which to slice
start - Starting index (None means 0)
end - Ending index (None means size of dim)
step - Step size (default: 1)

§Examples

let tensor = Tensor::zeros(&[5, 5], DeviceType::Cpu)?;
let src = Tensor::ones(&[2, 5], DeviceType::Cpu)?;
let result = tensor.slice_scatter(&src, 0, Some(1), Some(3), 1)?;
// result[1:3, :] = src

Source §

impl<T> Tensor<T>
where T: TensorElement + Copy + Default,

Source

pub fn stack(tensors: &[Tensor<T>], dim: isize) -> Result<Tensor<T>, TorshError>

Stack tensors along a new dimension

§PyTorch Compatibility

Equivalent to torch.stack(tensors, dim)

§Arguments

tensors - Sequence of tensors to stack
dim - Dimension along which to stack

§Examples

let a = Tensor::from_data(vec![1.0, 2.0], vec![2], DeviceType::Cpu)?;
let b = Tensor::from_data(vec![3.0, 4.0], vec![2], DeviceType::Cpu)?;
let result = Tensor::stack(&[a, b], 0)?; // shape: [2, 2]

§Arguments

repeats - Number of times to repeat each element
dim - Dimension along which to repeat (None = flatten first)

§Examples

let x = Tensor::from_data(vec![1.0, 2.0, 3.0], vec![3], DeviceType::Cpu)?;
let y = x.repeat_interleave(2, None)?; // [1.0, 1.0, 2.0, 2.0, 3.0, 3.0]

Source

pub fn unflatten( &self, dim: isize, sizes: &[usize], ) -> Result<Tensor<T>, TorshError>

Unflatten a dimension into multiple dimensions

§PyTorch Compatibility

Equivalent to torch.unflatten(tensor, dim, sizes)

§Arguments

dim - Dimension to unflatten
sizes - Target sizes for the unflattened dimensions

§Examples

let x = Tensor::from_data(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], vec![6], DeviceType::Cpu)?;
let y = x.unflatten(0, &[2, 3])?; // Shape becomes [2, 3]

Source

pub fn take_along_dim( &self, indices: &Tensor<i64>, dim: Option<isize>, ) -> Result<Tensor<T>, TorshError>

Gather values along a dimension using indices

§PyTorch Compatibility

Equivalent to torch.take_along_dim(tensor, indices, dim)

§Arguments

indices - Indices to gather
dim - Dimension along which to gather (None = flatten first)

§Examples

let x = Tensor::from_data(vec![1.0, 2.0, 3.0, 4.0], vec![4], DeviceType::Cpu)?;
let indices = Tensor::from_data(vec![0i64, 2], vec![2], DeviceType::Cpu)?;
let y = x.take_along_dim(&indices, None)?; // [1.0, 3.0]

Source §

Element-wise division with another tensor

Source §

impl<T> Tensor<T>
where T: TensorElement + Copy,

Source

pub fn add_(&mut self, other: &Tensor<T>) -> Result<&mut Tensor<T>, TorshError>
where T: Add<Output = T>,

Element-wise in-place addition: self += other.

For f32 tensors with ≥ 1024 elements and matching shapes, this routes through the SIMD-backed simd_ops_f32::add_assign_f32 without any additional allocation.

§Errors

Returns an error if requires_grad is true (autograd cannot be tracked through in-place mutations).

Source

pub fn sub_(&mut self, other: &Tensor<T>) -> Result<&mut Tensor<T>, TorshError>
where T: Sub<Output = T>,

Element-wise in-place subtraction: self -= other.

For f32 tensors with ≥ 1024 elements and matching shapes, this routes through simd_ops_f32::sub_assign_f32.

Source

pub fn mul_(&mut self, other: &Tensor<T>) -> Result<&mut Tensor<T>, TorshError>
where T: Mul<Output = T>,

Element-wise in-place multiplication: self *= other.

For f32 tensors with ≥ 1024 elements and matching shapes, this routes through simd_ops_f32::mul_assign_f32.

Source

pub fn div_(&mut self, other: &Tensor<T>) -> Result<&mut Tensor<T>, TorshError>
where T: Div<Output = T>,

Element-wise in-place division: self /= other.

For f32 tensors with ≥ 1024 elements and matching shapes, this routes through simd_ops_f32::div_assign_f32.

Source §

Sign of all elements (-1, 0, or 1)

Source §

Dot product with another tensor (for 1D tensors)

Source §

impl<T> Tensor<T>
where T: TensorElement + Copy + FromPrimitive,

Source

pub fn add_scirs2(&self, other: &Tensor<T>) -> Result<Tensor<T>, TorshError>
where T: Add<Output = T> + Float,

Use SciRS2 backend for optimized tensor addition

Source

pub fn mul_scirs2(&self, other: &Tensor<T>) -> Result<Tensor<T>, TorshError>
where T: Mul<Output = T> + Float,

Use SciRS2 backend for optimized tensor multiplication

Source

pub fn sub_scirs2(&self, other: &Tensor<T>) -> Result<Tensor<T>, TorshError>
where T: Sub<Output = T> + Float,

Use SciRS2 backend for optimized tensor subtraction

Source

pub fn div_scirs2(&self, other: &Tensor<T>) -> Result<Tensor<T>, TorshError>
where T: Div<Output = T> + Float,

Use SciRS2 backend for optimized tensor division

Dot product with SIMD acceleration (SciRS2)

Uses real hardware SIMD instructions (AVX2/NEON) via scirs2_core::simd_ops::SimdUnifiedOps

Returns a scalar value (sum of element-wise products)

Source

pub fn reduce_memory_efficient<F>(&self, func: F) -> Result<T, TorshError>
where F: Fn(T, T) -> T + Send + Sync,

Memory-efficient reduction using SciRS2 intelligent chunking and lazy evaluation

Source §

impl<T> Tensor<T>
where T: TensorElement + Copy + Mul<Output = T>,

Source

Get size of a specific dimension

Source

pub fn view(&self, shape: &[i32]) -> Result<Tensor<T>, TorshError>

Reshapes the tensor to a new shape (creates a view or copy if needed).

This is equivalent to PyTorch’s view() operation. The total number of elements must remain the same. You can use -1 for one dimension to have it inferred automatically.

§Arguments

shape - The new shape as a slice of dimensions. Use -1 to infer one dimension.

§Returns

A reshaped tensor, or an error if the reshape is invalid.

§Examples

use torsh_tensor::creation::zeros;

// Reshape a 1D tensor to 2D
let t = zeros::<f32>(&[6]).expect("tensor creation should succeed");
let reshaped = t.view(&[2, 3]).expect("view should succeed");
assert_eq!(reshaped.shape().dims(), &[2, 3]);

// Use -1 to infer a dimension
let t2 = zeros::<f32>(&[12]).expect("tensor creation should succeed");
let auto = t2.view(&[-1, 4]).expect("view should succeed");  // Infers 3 for first dimension
assert_eq!(auto.shape().dims(), &[3, 4]);

// Flatten to 1D
let matrix = zeros::<f32>(&[3, 4, 5]).expect("tensor creation should succeed");
let flat = matrix.view(&[-1]).expect("view should succeed");
assert_eq!(flat.shape().dims(), &[60]);

§Errors

Returns an error if:

More than one dimension is -1
The total number of elements doesn’t match
Any dimension would overflow

§See Also

Self::reshape - Alias for view()
Self::view_as - Zero-copy view for compatible shapes
Self::contiguous - Make tensor contiguous in memory

Swaps the specified dimensions, creating a new tensor. For 2D tensors, calling transpose(0, 1) produces the standard matrix transpose operation.

§Arguments

dim0 - The first dimension to swap. Negative values count from the end.
dim1 - The second dimension to swap. Negative values count from the end.

§Returns

A tensor with the specified dimensions transposed.

§Examples

use torsh_tensor::creation::{zeros, arange};

// Standard matrix transpose
let matrix = zeros::<f32>(&[3, 4]).expect("tensor creation should succeed");
let transposed = matrix.transpose(0, 1).expect("transpose should succeed");
assert_eq!(transposed.shape().dims(), &[4, 3]);

// Transpose in 3D tensor
let cube = zeros::<f32>(&[2, 3, 4]).expect("tensor creation should succeed");
let swapped = cube.transpose(0, 2).expect("transpose should succeed");
assert_eq!(swapped.shape().dims(), &[4, 3, 2]);

// Use negative indexing
let t = zeros::<f32>(&[5, 6, 7]).expect("tensor creation should succeed");
let result = t.transpose(-2, -1).expect("transpose should succeed");
assert_eq!(result.shape().dims(), &[5, 7, 6]);

// Practical use: convert between row-major and column-major
let data = arange(0, 12, 1).expect("arange should succeed");
let row_major = data.reshape(&[3, 4]).expect("reshape should succeed");
let col_major = row_major.transpose(0, 1).expect("transpose should succeed");

§See Also

Self::permute - Rearrange dimensions in arbitrary order
Self::view - Reshape to different dimensions

Source

pub fn permute(&self, dims: &[i32]) -> Result<Tensor<T>, TorshError>

Permute dimensions according to the given order

Source

pub fn squeeze(&self, dim: i32) -> Result<Tensor<T>, TorshError>

Removes a dimension of size 1 at the specified position.

This operation reduces the dimensionality of the tensor by removing dimensions that have size 1. Commonly used to remove singleton dimensions after reductions or to match tensor shapes for operations.

§Arguments

dim - The dimension to squeeze. Negative values count from the end.

§Returns

A tensor with the specified dimension removed, or an error if the dimension doesn’t have size 1.

§Examples

use torsh_tensor::creation::zeros;

// Remove a singleton dimension
let t = zeros::<f32>(&[3, 1, 4]).expect("tensor creation should succeed");
let squeezed = t.squeeze(1).expect("squeeze should succeed");
assert_eq!(squeezed.shape().dims(), &[3, 4]);

// Use negative indexing
let t2 = zeros::<f32>(&[2, 3, 1]).expect("tensor creation should succeed");
let squeezed2 = t2.squeeze(-1).expect("squeeze should succeed");
assert_eq!(squeezed2.shape().dims(), &[2, 3]);

// After a reduction operation
let matrix = zeros::<f32>(&[5, 10]).expect("tensor creation should succeed");
let reduced = matrix.sum_dim(&[1], true).expect("sum_dim should succeed");  // Shape: [5, 1]
let final_result = reduced.squeeze(1).expect("squeeze should succeed");  // Shape: [5]

§See Also

Self::squeeze_all - Remove all singleton dimensions
Self::unsqueeze - Add a singleton dimension

Source

pub fn squeeze_all(&self) -> Result<Tensor<T>, TorshError>

Squeeze all dimensions with size 1

Source

pub fn unsqueeze(&self, dim: i32) -> Result<Tensor<T>, TorshError>

Adds a dimension of size 1 at the specified position.

This operation increases the dimensionality of the tensor by inserting a new dimension of size 1. Commonly used to add batch dimensions or to match tensor shapes for broadcasting operations.

§Arguments

dim - The position to insert the new dimension. Negative values count from the end.

§Returns

A tensor with an additional dimension of size 1 inserted.

§Examples

use torsh_tensor::creation::zeros;

// Add a batch dimension at the beginning
let t = zeros::<f32>(&[3, 4]).expect("tensor creation should succeed");
let batched = t.unsqueeze(0).expect("unsqueeze should succeed");
assert_eq!(batched.shape().dims(), &[1, 3, 4]);

// Add a dimension at the end
let t2 = zeros::<f32>(&[5]).expect("tensor creation should succeed");
let expanded = t2.unsqueeze(-1).expect("unsqueeze should succeed");
assert_eq!(expanded.shape().dims(), &[5, 1]);

// Prepare for broadcasting
let weights = zeros::<f32>(&[64]).expect("tensor creation should succeed");
let weights_2d = weights.unsqueeze(0).expect("unsqueeze should succeed");  // Shape: [1, 64]
// Now can broadcast with shape [batch_size, 64]

§See Also

Self::squeeze - Remove a singleton dimension
Self::view - Reshape to arbitrary shape

Source

pub fn reshape(&self, shape: &[i32]) -> Result<Tensor<T>, TorshError>

Reshapes the tensor to a new shape.

This is an alias for view() and provides the same functionality. The total number of elements must remain the same.

§Arguments

shape - The new shape as a slice of dimensions. Use -1 to infer one dimension.

§Returns

A reshaped tensor, or an error if the reshape is invalid.

§Examples

use torsh_tensor::creation::arange;

// Reshape a sequence to a matrix
let t = arange(0, 12, 1).expect("arange should succeed");
let matrix = t.reshape(&[3, 4]).expect("reshape should succeed");
assert_eq!(matrix.shape().dims(), &[3, 4]);

// Reshape with automatic dimension inference
let cube = t.reshape(&[2, -1, 3]).expect("reshape should succeed");  // Infers 2 for middle dimension
assert_eq!(cube.shape().dims(), &[2, 2, 3]);

§See Also

Self::view - The underlying implementation

Source

pub fn is_contiguous(&self) -> bool

Check if tensor is contiguous in memory

Source

pub fn contiguous(&self) -> Result<Tensor<T>, TorshError>

Make tensor contiguous if it isn’t already

Source

pub fn expand(&self, shape: &[usize]) -> Result<Tensor<T>, TorshError>

Expand tensor to a larger size

Source

pub fn movedim( &self, source: &[isize], destination: &[isize], ) -> Result<Tensor<T>, TorshError>

Move dimensions from source positions to destination positions

§PyTorch Compatibility

Equivalent to torch.movedim(tensor, source, destination)

§Arguments

source - Original positions of dimensions to move
destination - Target positions for the dimensions

§Examples

let x = Tensor::from_data(vec![1.0; 24], vec![2, 3, 4], DeviceType::Cpu)?;
let y = x.movedim(&[0, 1], &[2, 0])?; // [2,3,4] -> [3,4,2]

Source

pub fn moveaxis( &self, source: &[isize], destination: &[isize], ) -> Result<Tensor<T>, TorshError>

Move axis from source position to destination position (alias for movedim)

§PyTorch Compatibility

Equivalent to torch.moveaxis(tensor, source, destination)

§Arguments

source - Original positions of axes to move
destination - Target positions for the axes

Source

pub fn swapaxes( &self, axis0: isize, axis1: isize, ) -> Result<Tensor<T>, TorshError>

Swap two dimensions

§PyTorch Compatibility

Equivalent to torch.swapaxes(tensor, axis0, axis1) or torch.swapdims(tensor, dim0, dim1)

§Arguments

axis0 - First dimension
axis1 - Second dimension

§Examples

let x = Tensor::from_data(vec![1.0; 12], vec![2, 3, 2], DeviceType::Cpu)?;
let y = x.swapaxes(0, 2)?; // [2,3,2] -> [2,3,2] with dims 0 and 2 swapped

Source

pub fn swapdims( &self, dim0: isize, dim1: isize, ) -> Result<Tensor<T>, TorshError>

Swap two dimensions (alias for swapaxes)

§PyTorch Compatibility

Equivalent to torch.swapdims(tensor, dim0, dim1)

Source

pub fn broadcast_to(&self, shape: &[usize]) -> Result<Tensor<T>, TorshError>

Broadcast tensor to a new shape

§PyTorch Compatibility

Equivalent to torch.broadcast_to(tensor, shape)

§Arguments

shape - Target shape for broadcasting

§Examples

let x = Tensor::from_data(vec![1.0, 2.0], vec![2], DeviceType::Cpu)?;
let y = x.broadcast_to(&[3, 2])?; // Broadcast [2] to [3, 2]

Source

pub fn expand_as(&self, other: &Tensor<T>) -> Result<Tensor<T>, TorshError>

Expand tensor to match another tensor’s shape

§PyTorch Compatibility

Equivalent to torch.expand_as(tensor, other)

§Arguments

other - Target tensor whose shape to match

§Examples

let x = Tensor::from_data(vec![1.0, 2.0], vec![2], DeviceType::Cpu)?;
let y = Tensor::from_data(vec![0.0; 6], vec![3, 2], DeviceType::Cpu)?;
let z = x.expand_as(&y)?; // Expand x to match y's shape [3, 2]

Source §

Fused multiply-add with proper bf16 rounding

Source §

impl<T> Tensor<T>
where T: TensorElement + Copy,

Source

pub fn optimize_cache_layout(&mut self) -> Result<(), TorshError>

Memory layout optimization for cache efficiency Analyzes and optimizes the tensor’s memory layout to improve cache performance

Source

pub fn analyze_cache_performance(&self) -> CacheAnalysisReport

Analyze memory access patterns and provide optimization recommendations

Source

pub fn to_cache_optimized(&self) -> Result<Tensor<T>, TorshError>

Create a cache-optimized copy of the tensor

Source

pub fn memory_stats(&self) -> MemoryStats

Get memory usage statistics for the tensor

Source §

impl<T> Tensor<T>
where T: TensorElement + Copy + Default,

Source

pub fn optimize_memory_layout( &mut self, numa_hint: Option<NumaAllocationHint>, ) -> Result<(), TorshError>

Advanced memory optimization with NUMA awareness

Source

pub fn create_memory_mapped_optimized( data: Vec<T>, shape: Vec<usize>, numa_hint: Option<NumaAllocationHint>, ) -> Result<Tensor<T>, TorshError>

Memory-mapped tensor creation with optimization hints

Source

pub fn prefetch_data(&self) -> Result<(), TorshError>

Prefetch memory pages for better performance

Source §

let tensor = Tensor::from_data(
    vec![1.0, f32::NAN, f32::INFINITY, -f32::INFINITY],
    vec![4],
    DeviceType::Cpu
).expect("tensor creation should succeed");

let config = NanInfConfig::detailed();
let report = tensor.check_nan_inf_with_config(&config);

assert_eq!(report.stats.nan_count, 1);
assert_eq!(report.stats.pos_inf_count, 1);
assert_eq!(report.stats.neg_inf_count, 1);
assert_eq!(report.locations.len(), 3);

Source

let tensor = Tensor::from_data(
    vec![1.0, f32::NAN, 3.0, f32::INFINITY],
    vec![4],
    DeviceType::Cpu
).expect("tensor creation should succeed");

let mask = tensor.nan_inf_mask().expect("nan_inf_mask should succeed");
let mask_data = mask.to_vec().expect("to_vec conversion should succeed");
assert_eq!(mask_data, vec![false, true, false, true]);

Source §

impl<T> Tensor<T>
where T: TensorElement,

This impl block contains no public items.

Compute correlation matrix for 2D tensor

Source §

Convert tensor using specific SIMD strategy

Source §

fn apply_custom_op( &self, op_name: &str, other_inputs: &[&Tensor<T>], params: &OperationParams, ) -> Result<Vec<Tensor<T>>, TorshError>

Apply a custom operation to this tensor Read more

impl<T> TensorShapeConvenience<T> for Tensor<T>
where T: TensorElement + Copy,

Source §

fn unsqueeze_at(&self, dim: i32) -> Result<Tensor<T>, TorshError>

Add singleton dimension at specified position

Source §

fn squeeze_all(&self) -> Result<Tensor<T>, TorshError>

Remove all singleton dimensions

Source §

fn flatten(&self) -> Result<Tensor<T>, TorshError>

Flatten tensor to 1D (preserving total number of elements)

Source §

fn flatten_from(&self, start_dim: i32) -> Result<Tensor<T>, TorshError>

Flatten tensor starting from specified dimension

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> IntoEither for T

Source §

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

Drops the object pointed to by the given pointer. Read more

Source §

impl<T> ToOwned for T
where T: Clone,

Source §

type Owned = T

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source §

Struct Tensor Copy item path

Implementations§

impl<T> Tensor<T>where T: FloatElement + Copy,

pub fn scalar(value: T) -> Result<Tensor<T>, TorshError>

pub fn as_ndarray( &self, ) -> Result<ArrayBase<OwnedRepr<T>, Dim<IxDynImpl>>, TorshError>

pub fn from_ndarray( array: ArrayBase<OwnedRepr<T>, Dim<IxDynImpl>>, device: DeviceType, ) -> Result<Tensor<T>, TorshError>

pub fn max( &self, dim: Option<usize>, keepdim: bool, ) -> Result<Tensor<T>, TorshError>

pub fn max_dim(&self, dim: i32, keepdim: bool) -> Result<Tensor<T>, TorshError>

pub fn min_dim(&self, dim: i32, keepdim: bool) -> Result<Tensor<T>, TorshError>

impl<T> Tensor<T>where T: TensorElement + Copy + PartialEq + Zero,

pub fn all(&self) -> Result<Tensor<bool>, TorshError>

pub fn any(&self) -> Result<Tensor<bool>, TorshError>

pub fn all_dim( &self, dim: i32, _keepdim: bool, ) -> Result<Tensor<bool>, TorshError>

pub fn any_dim( &self, dim: i32, _keepdim: bool, ) -> Result<Tensor<bool>, TorshError>

impl<T> Tensor<T>where T: TensorElement + Copy,

pub fn sum(&self) -> Result<Tensor<T>, TorshError>where T: Add<Output = T> + Zero,

pub fn sum_dim( &self, dims: &[i32], keepdim: bool, ) -> Result<Tensor<T>, TorshError>where T: Add<Output = T> + Zero,

pub fn mean( &self, dims: Option<&[usize]>, keepdim: bool, ) -> Result<Tensor<T>, TorshError>where T: Add<Output = T> + Div<Output = T> + Zero + One + FromPrimitive,

pub fn cumprod(&self, dim: i32) -> Result<Tensor<T>, TorshError>where T: Mul<Output = T> + One + Copy,

pub fn matmul(&self, other: &Tensor<T>) -> Result<Tensor<T>, TorshError>where T: Float + Sum,

pub fn sort( &self, _dim: Option<i32>, _descending: bool, ) -> Result<(Tensor<T>, Tensor<T>), TorshError>where T: PartialOrd + Zero + FromPrimitive,

pub fn min(&self) -> Result<Tensor<T>, TorshError>where T: PartialOrd + Copy,

pub fn t(&self) -> Result<Tensor<T>, TorshError>where T: Copy + Zero,

pub fn shares_storage(&self, other: &Tensor<T>) -> bool

pub fn data(&self) -> Result<Vec<T>, TorshError>where T: Copy,

pub fn data_mut_apply<F>(&mut self, func: F) -> Result<(), TorshError>where F: FnMut(&mut T), T: Copy,

pub fn clone_data(&self) -> Tensor<T>where T: Copy,

pub fn make_unique(&mut self) -> Result<(), TorshError>

pub fn apply_<F>(&mut self, func: F) -> Result<(), TorshError>where F: Fn(T) -> T, T: Copy,

pub fn map<F>(&self, func: F) -> Result<Tensor<T>, TorshError>where F: Fn(T) -> T, T: Copy,

pub fn item(&self) -> Result<T, TorshError>where T: Copy,

pub fn cat(tensors: &[&Tensor<T>], dim: i32) -> Result<Tensor<T>, TorshError>where T: Copy,

impl<T> Tensor<T>where T: TensorElement + Copy + Float,

pub fn norm(&self) -> Result<Tensor<T>, TorshError>

impl<T> Tensor<T>where T: TensorElement + Copy,

pub fn matmul_scirs2(&self, other: &Tensor<T>) -> Result<Tensor<T>, TorshError>where T: Float + Zero + One + Sum,

pub fn sum_scirs2(&self) -> Result<Tensor<T>, TorshError>where T: Add<Output = T> + Zero,

pub fn mean_scirs2(&self) -> Result<Tensor<T>, TorshError>where T: Add<Output = T> + Div<Output = T> + Zero + From<usize> + FromPrimitive,

pub fn relu_scirs2(&self) -> Result<Tensor<T>, TorshError>where T: PartialOrd + Zero,

pub fn sigmoid_scirs2(&self) -> Result<Tensor<T>, TorshError>where T: Float,

pub fn tanh_scirs2(&self) -> Result<Tensor<T>, TorshError>where T: Float,

pub fn softmax(&self, dim: i32) -> Result<Tensor<T>, TorshError>where T: FloatElement<Output = T, Output = T> + Copy + Sub + Div,

pub fn log_softmax(&self, dim: i32) -> Result<Tensor<T>, TorshError>where T: FloatElement<Output = T> + Copy + Sub,

pub fn cumsum(&self, dim: i32) -> Result<Tensor<T>, TorshError>where T: Add<Output = T> + Zero + Copy,

pub fn argmin(&self, dim: Option<i32>) -> Result<Tensor<i64>, TorshError>where T: PartialOrd + Copy,

pub fn argmax(&self, dim: Option<i32>) -> Result<Tensor<i64>, TorshError>where T: PartialOrd + Copy,

pub fn topk( &self, k: usize, dim: Option<i32>, largest: bool, sorted: bool, ) -> Result<(Tensor<T>, Tensor<i64>), TorshError>where T: PartialOrd + Copy + Zero,

impl<T> Tensor<T>where T: ComplexElement + Copy,

pub fn complex_conj(&self) -> Result<Tensor<T>, TorshError>where T: Copy,

pub fn real(&self) -> Result<Tensor<<T as ComplexElement>::Real>, TorshError>where <T as ComplexElement>::Real: TensorElement + Copy,

pub fn imag(&self) -> Result<Tensor<<T as ComplexElement>::Real>, TorshError>where <T as ComplexElement>::Real: TensorElement + Copy,

pub fn abs(&self) -> Result<Tensor<<T as ComplexElement>::Real>, TorshError>where <T as ComplexElement>::Real: TensorElement + Copy + Float,

pub fn angle(&self) -> Result<Tensor<<T as ComplexElement>::Real>, TorshError>where <T as ComplexElement>::Real: TensorElement + Copy + Float,

pub fn complex( real: &Tensor<<T as ComplexElement>::Real>, imag: &Tensor<<T as ComplexElement>::Real>, ) -> Result<Tensor<T>, TorshError>where <T as ComplexElement>::Real: TensorElement + Copy,

pub fn polar( magnitude: &Tensor<<T as ComplexElement>::Real>, phase: &Tensor<<T as ComplexElement>::Real>, ) -> Result<Tensor<T>, TorshError>where <T as ComplexElement>::Real: TensorElement + Copy + Float,

pub fn backward_complex(&self) -> Result<(), TorshError>where T: Copy + Default + Add<Output = T> + Sub<Output = T> + Mul<Output = T> + Div<Output = T>,

pub fn complex_mul(&self, other: &Tensor<T>) -> Result<Tensor<T>, TorshError>where T: Mul<Output = T> + Add<Output = T> + Sub<Output = T>,

pub fn complex_add(&self, other: &Tensor<T>) -> Result<Tensor<T>, TorshError>where T: Add<Output = T>,

pub fn is_real(&self) -> Result<bool, TorshError>where <T as ComplexElement>::Real: PartialEq + Zero,

pub fn is_complex(&self) -> Result<bool, TorshError>where <T as ComplexElement>::Real: PartialEq + Zero,

impl<T> Tensor<T>where T: TensorElement + Copy,

pub fn cleanup_operation_refs(&mut self)

pub fn from_data( data: Vec<T>, shape: Vec<usize>, device: DeviceType, ) -> Result<Tensor<T>, TorshError>

pub fn from_data_fast( data: Vec<T>, shape: Vec<usize>, device: DeviceType, ) -> Tensor<T>

§Performance

pub fn from_data_with_storage( data: Vec<T>, shape: Vec<usize>, device: DeviceType, use_memory_mapping: bool, ) -> Result<Tensor<T>, TorshError>

pub fn from_data_memory_mapped( data: Vec<T>, shape: Vec<usize>, device: DeviceType, file_path: PathBuf, ) -> Result<Tensor<T>, TorshError>

pub fn zeros( shape: &[usize], device: DeviceType, ) -> Result<Tensor<T>, TorshError>

pub fn ones( shape: &[usize], device: DeviceType, ) -> Result<Tensor<T>, TorshError>

pub fn shape(&self) -> Shape

pub fn ndim(&self) -> usize

pub fn numel(&self) -> usize

pub fn dtype(&self) -> DType

pub fn to_dtype(&self, dtype: DType) -> Result<Tensor<T>, TorshError>

pub fn device(&self) -> DeviceType

pub fn get(&self, indices: &[usize]) -> Result<T, TorshError>where T: Copy,

pub fn get_flat(&self, index: usize) -> Result<T, TorshError>where T: Copy,

pub fn set(&self, indices: &[usize], value: T) -> Result<(), TorshError>where T: Copy,

pub fn get_slice(&self, start: usize, len: usize) -> Result<Vec<T>, TorshError>where T: Copy,

pub fn set_slice(&self, start: usize, values: &[T]) -> Result<(), TorshError>where T: Copy,

Struct Tensor

impl<T> Tensor<T>
where T: FloatElement + Copy,

impl<T> Tensor<T>
where T: TensorElement + Copy + PartialEq + Zero,

impl<T> Tensor<T>
where T: TensorElement + Copy,

pub fn sum(&self) -> Result<Tensor<T>, TorshError>
where T: Add<Output = T> + Zero,

pub fn sum_dim( &self, dims: &[i32], keepdim: bool, ) -> Result<Tensor<T>, TorshError>
where T: Add<Output = T> + Zero,

pub fn mean( &self, dims: Option<&[usize]>, keepdim: bool, ) -> Result<Tensor<T>, TorshError>
where T: Add<Output = T> + Div<Output = T> + Zero + One + FromPrimitive,

pub fn cumprod(&self, dim: i32) -> Result<Tensor<T>, TorshError>
where T: Mul<Output = T> + One + Copy,

pub fn matmul(&self, other: &Tensor<T>) -> Result<Tensor<T>, TorshError>
where T: Float + Sum,

pub fn sort( &self, _dim: Option<i32>, _descending: bool, ) -> Result<(Tensor<T>, Tensor<T>), TorshError>
where T: PartialOrd + Zero + FromPrimitive,

pub fn min(&self) -> Result<Tensor<T>, TorshError>
where T: PartialOrd + Copy,

pub fn t(&self) -> Result<Tensor<T>, TorshError>
where T: Copy + Zero,

pub fn data(&self) -> Result<Vec<T>, TorshError>
where T: Copy,

pub fn data_mut_apply<F>(&mut self, func: F) -> Result<(), TorshError>
where F: FnMut(&mut T), T: Copy,

pub fn clone_data(&self) -> Tensor<T>
where T: Copy,

pub fn apply_<F>(&mut self, func: F) -> Result<(), TorshError>
where F: Fn(T) -> T, T: Copy,

pub fn map<F>(&self, func: F) -> Result<Tensor<T>, TorshError>
where F: Fn(T) -> T, T: Copy,

pub fn item(&self) -> Result<T, TorshError>
where T: Copy,

pub fn cat(tensors: &[&Tensor<T>], dim: i32) -> Result<Tensor<T>, TorshError>
where T: Copy,

impl<T> Tensor<T>
where T: TensorElement + Copy + Float,

impl<T> Tensor<T>
where T: TensorElement + Copy,

pub fn matmul_scirs2(&self, other: &Tensor<T>) -> Result<Tensor<T>, TorshError>
where T: Float + Zero + One + Sum,

pub fn sum_scirs2(&self) -> Result<Tensor<T>, TorshError>
where T: Add<Output = T> + Zero,

pub fn mean_scirs2(&self) -> Result<Tensor<T>, TorshError>
where T: Add<Output = T> + Div<Output = T> + Zero + From<usize> + FromPrimitive,

pub fn relu_scirs2(&self) -> Result<Tensor<T>, TorshError>
where T: PartialOrd + Zero,

pub fn sigmoid_scirs2(&self) -> Result<Tensor<T>, TorshError>
where T: Float,

pub fn tanh_scirs2(&self) -> Result<Tensor<T>, TorshError>
where T: Float,

pub fn softmax(&self, dim: i32) -> Result<Tensor<T>, TorshError>
where T: FloatElement<Output = T, Output = T> + Copy + Sub + Div,

pub fn log_softmax(&self, dim: i32) -> Result<Tensor<T>, TorshError>
where T: FloatElement<Output = T> + Copy + Sub,

pub fn cumsum(&self, dim: i32) -> Result<Tensor<T>, TorshError>
where T: Add<Output = T> + Zero + Copy,

pub fn argmin(&self, dim: Option<i32>) -> Result<Tensor<i64>, TorshError>
where T: PartialOrd + Copy,

pub fn argmax(&self, dim: Option<i32>) -> Result<Tensor<i64>, TorshError>
where T: PartialOrd + Copy,

pub fn topk( &self, k: usize, dim: Option<i32>, largest: bool, sorted: bool, ) -> Result<(Tensor<T>, Tensor<i64>), TorshError>
where T: PartialOrd + Copy + Zero,

impl<T> Tensor<T>
where T: ComplexElement + Copy,

pub fn complex_conj(&self) -> Result<Tensor<T>, TorshError>
where T: Copy,

pub fn real(&self) -> Result<Tensor<<T as ComplexElement>::Real>, TorshError>
where <T as ComplexElement>::Real: TensorElement + Copy,

pub fn imag(&self) -> Result<Tensor<<T as ComplexElement>::Real>, TorshError>
where <T as ComplexElement>::Real: TensorElement + Copy,

pub fn abs(&self) -> Result<Tensor<<T as ComplexElement>::Real>, TorshError>
where <T as ComplexElement>::Real: TensorElement + Copy + Float,

pub fn angle(&self) -> Result<Tensor<<T as ComplexElement>::Real>, TorshError>
where <T as ComplexElement>::Real: TensorElement + Copy + Float,

pub fn complex( real: &Tensor<<T as ComplexElement>::Real>, imag: &Tensor<<T as ComplexElement>::Real>, ) -> Result<Tensor<T>, TorshError>
where <T as ComplexElement>::Real: TensorElement + Copy,

pub fn polar( magnitude: &Tensor<<T as ComplexElement>::Real>, phase: &Tensor<<T as ComplexElement>::Real>, ) -> Result<Tensor<T>, TorshError>
where <T as ComplexElement>::Real: TensorElement + Copy + Float,

pub fn backward_complex(&self) -> Result<(), TorshError>
where T: Copy + Default + Add<Output = T> + Sub<Output = T> + Mul<Output = T> + Div<Output = T>,

pub fn complex_mul(&self, other: &Tensor<T>) -> Result<Tensor<T>, TorshError>
where T: Mul<Output = T> + Add<Output = T> + Sub<Output = T>,

pub fn complex_add(&self, other: &Tensor<T>) -> Result<Tensor<T>, TorshError>
where T: Add<Output = T>,

pub fn is_real(&self) -> Result<bool, TorshError>
where <T as ComplexElement>::Real: PartialEq + Zero,

pub fn is_complex(&self) -> Result<bool, TorshError>
where <T as ComplexElement>::Real: PartialEq + Zero,

impl<T> Tensor<T>
where T: TensorElement + Copy,

pub fn get(&self, indices: &[usize]) -> Result<T, TorshError>
where T: Copy,

pub fn get_flat(&self, index: usize) -> Result<T, TorshError>
where T: Copy,

pub fn set(&self, indices: &[usize], value: T) -> Result<(), TorshError>
where T: Copy,

pub fn get_slice(&self, start: usize, len: usize) -> Result<Vec<T>, TorshError>
where T: Copy,

pub fn set_slice(&self, start: usize, values: &[T]) -> Result<(), TorshError>
where T: Copy,

pub fn to_vec(&self) -> Result<Vec<T>, TorshError>
where T: Copy,

pub fn with_data_slice<R, F>(&self, f: F) -> Result<R, TorshError>
where F: FnOnce(&[T]) -> Result<R, TorshError>, T: Copy,

pub fn with_data_slice_mut<R, F>(&self, f: F) -> Result<R, TorshError>
where F: FnOnce(&mut [T]) -> Result<R, TorshError>, T: Copy,

pub fn to<D>(self, device: D) -> Result<Tensor<T>, TorshError>
where D: Into<DeviceType>,