numr 0.5.1

High-performance numerical computing with multi-backend GPU acceleration (CPU/CUDA/WebGPU)
Documentation
//! Tensor decomposition algorithms for higher-order tensors
//!
//! Provides algorithms for decomposing N-dimensional tensors into
//! structured forms (Tucker, CP/PARAFAC, Tensor-Train).

use super::super::decompositions::*;
use crate::error::{Error, Result};
use crate::runtime::Runtime;
use crate::tensor::Tensor;

/// Tensor decomposition algorithms for higher-order tensors (N-dimensional arrays)
///
/// This trait provides algorithms for decomposing N-dimensional tensors into
/// structured forms. Unlike matrix decompositions which operate on 2D arrays,
/// tensor decompositions handle arbitrary-dimensional data.
///
/// # Core Operations
///
/// The trait provides fundamental tensor operations that are building blocks
/// for decomposition algorithms:
///
/// - **Mode-n Unfolding (Matricization)**: Convert N-D tensor to 2D matrix
/// - **Mode-n Folding**: Inverse of unfolding, reconstruct tensor from matrix
/// - **Mode-n Product**: Multiply tensor by matrix along a specific mode
///
/// # Decomposition Algorithms
///
/// - **Tucker**: T ≈ G ×₁ A₁ ×₂ A₂ ... (core tensor + factor matrices)
/// - **HOSVD**: Higher-Order SVD (Tucker with orthogonal factors via SVD)
/// - **CP/PARAFAC**: T ≈ Σᵣ λᵣ (a₁ʳ ⊗ a₂ʳ ⊗ ...) (sum of rank-1 tensors)
/// - **Tensor-Train**: T = G₁ × G₂ × ... × Gₙ (sequence of 3D cores)
///
/// # Implementation Requirements
///
/// All backends must implement these algorithms identically to ensure
/// numerical parity. The algorithms use existing operations (SVD, matmul,
/// reshape, permute) from the runtime.
///
/// # Use Cases
///
/// - **Data Compression**: Reduce storage for multi-dimensional arrays
/// - **Dimensionality Reduction**: Extract principal components from tensor data
/// - **Latent Factor Models**: Discover hidden structure in multi-way data
/// - **Quantum Systems**: Tensor network representations
/// - **Recommender Systems**: User-item-context factorization
pub trait TensorDecomposeAlgorithms<R: Runtime> {
    /// Mode-n unfolding (matricization) of a tensor
    ///
    /// Unfolds an N-dimensional tensor into a 2D matrix by arranging mode-n
    /// fibers as columns of the resulting matrix.
    ///
    /// # Mathematical Definition
    ///
    /// For a tensor T of shape `[I₁, I₂, ..., Iₙ]`, the mode-n unfolding `T₍ₙ₎`
    /// is a matrix of shape `[Iₙ, ∏ⱼ≠ₙ Iⱼ]` where:
    ///
    /// ```text
    /// T₍ₙ₎[iₙ, j] = T[i₁, i₂, ..., iₙ, ..., iₙ]
    /// ```
    ///
    /// where `j` is computed from indices `(i₁, ..., iₙ₋₁, iₙ₊₁, ..., iₙ)`
    /// using a specific ordering convention.
    ///
    /// # Convention
    ///
    /// Uses the standard convention where modes are ordered as:
    /// n, n+1, ..., N, 1, 2, ..., n-1 (forward cyclic from mode n)
    ///
    /// # Arguments
    ///
    /// * `tensor` - Input tensor of arbitrary dimension (≥ 2)
    /// * `mode` - Mode along which to unfold (0-indexed, must be < ndim)
    ///
    /// # Returns
    ///
    /// Matrix of shape `[I_mode, ∏ⱼ≠mode Iⱼ]`
    ///
    /// # Example
    ///
    /// ```ignore
    /// // Tensor of shape `[2, 3, 4]`
    /// let unfolded = client.unfold(&tensor, 1)?;
    /// // Result has shape `[3, 8]` (mode-1 fibers as rows)
    /// ```
    fn unfold(&self, tensor: &Tensor<R>, mode: usize) -> Result<Tensor<R>> {
        let _ = (tensor, mode);
        Err(Error::NotImplemented {
            feature: "TensorDecomposeAlgorithms::unfold",
        })
    }

    /// Mode-n folding (tensorization) - inverse of unfolding
    ///
    /// Reconstructs an N-dimensional tensor from its mode-n unfolding.
    ///
    /// # Arguments
    ///
    /// * `matrix` - The mode-n unfolded matrix [I_mode, ∏ⱼ≠mode Iⱼ]
    /// * `mode` - Mode that was unfolded (0-indexed)
    /// * `shape` - Original tensor shape [I₁, I₂, ..., Iₙ]
    ///
    /// # Returns
    ///
    /// Tensor of the specified shape
    ///
    /// # Panics
    ///
    /// If matrix dimensions don't match the expected unfolded size for the given shape.
    fn fold(&self, matrix: &Tensor<R>, mode: usize, shape: &[usize]) -> Result<Tensor<R>> {
        let _ = (matrix, mode, shape);
        Err(Error::NotImplemented {
            feature: "TensorDecomposeAlgorithms::fold",
        })
    }

    /// Mode-n product: tensor × matrix along mode n
    ///
    /// Multiplies a tensor by a matrix along a specified mode. This is the
    /// fundamental operation used in Tucker decomposition reconstruction.
    ///
    /// # Mathematical Definition
    ///
    /// For tensor T of shape `[I₁, ..., Iₙ, ..., Iₙ]` and matrix M of shape `[J, Iₙ]`,
    /// the mode-n product `Y = T ×ₙ M` has shape `[I₁, ..., J, ..., Iₙ]` where:
    ///
    /// ```text
    /// Y[i₁, ..., j, ..., iₙ] = Σₖ T[i₁, ..., k, ..., iₙ] × M[j, k]
    /// ```
    ///
    /// # Equivalent Operations
    ///
    /// ```text
    /// T ×ₙ M  ⟺  fold(M @ unfold(T, n), n, new_shape)
    /// ```
    ///
    /// # Properties
    ///
    /// - `(T ×ₘ A) ×ₙ B = (T ×ₙ B) ×ₘ A` when `m ≠ n` (modes commute)
    /// - `(T ×ₙ A) ×ₙ B = T ×ₙ (BA)` (same mode contracts)
    /// - `T ×ₙ I = T` (identity matrix leaves tensor unchanged)
    ///
    /// # Arguments
    ///
    /// * `tensor` - Input tensor of shape `[I₁, ..., Iₙ, ..., Iₙ]`
    /// * `matrix` - Matrix of shape `[J, Iₙ]` to multiply along mode `n`
    /// * `mode` - Mode along which to multiply (0-indexed)
    ///
    /// # Returns
    ///
    /// Tensor of shape `[I₁, ..., J, ..., Iₙ]` (mode `n` dimension changed from `Iₙ` to `J`)
    fn mode_n_product(
        &self,
        tensor: &Tensor<R>,
        matrix: &Tensor<R>,
        mode: usize,
    ) -> Result<Tensor<R>> {
        let _ = (tensor, matrix, mode);
        Err(Error::NotImplemented {
            feature: "TensorDecomposeAlgorithms::mode_n_product",
        })
    }

    /// Higher-Order SVD (HOSVD) decomposition
    ///
    /// Computes a Tucker decomposition where factor matrices are orthogonal,
    /// obtained by computing truncated SVD of each mode-n unfolding.
    ///
    /// # Algorithm
    ///
    /// 1. For each mode `n = 1, ..., N`:
    ///    - Compute mode-n unfolding: `T₍ₙ₎`
    ///    - Compute truncated SVD: `T₍ₙ₎ ≈ Uₙ @ Sₙ @ Vₙᵀ`
    ///    - Set factor matrix `Aₙ = first Rₙ columns of Uₙ`
    /// 2. Compute core: `G = T ×₁ A₁ᵀ ×₂ A₂ᵀ ... ×ₙ Aₙᵀ`
    ///
    /// # Properties
    ///
    /// - Factor matrices are orthogonal: `Aₙᵀ @ Aₙ = I`
    /// - Core tensor is "all-orthogonal": mode-n unfoldings have orthogonal rows
    /// - NOT the best `rank-(R₁, ..., Rₙ)` approximation (use Tucker ALS for that)
    /// - Fast: O(N × SVD cost) vs iterative methods
    ///
    /// # Arguments
    ///
    /// * `tensor` - Input tensor of arbitrary dimension
    /// * `ranks` - Multilinear ranks `[R₁, R₂, ..., Rₙ]`. Each `Rₖ ≤ Iₖ`.
    ///   Use 0 or dimension size to keep full rank for that mode.
    ///
    /// # Returns
    ///
    /// Tucker decomposition with orthogonal factor matrices
    fn hosvd(&self, tensor: &Tensor<R>, ranks: &[usize]) -> Result<TuckerDecomposition<R>> {
        let _ = (tensor, ranks);
        Err(Error::NotImplemented {
            feature: "TensorDecomposeAlgorithms::hosvd",
        })
    }

    /// Tucker decomposition via Higher-Order Orthogonal Iteration (HOOI)
    ///
    /// Iteratively refines a Tucker decomposition to minimize reconstruction error.
    /// More accurate than HOSVD but more expensive.
    ///
    /// # Algorithm
    ///
    /// 1. Initialize factors using HOSVD or random
    /// 2. Repeat until convergence:
    ///    - For each mode `n`:
    ///      - Compute: `Y = T ×₁ A₁ᵀ ... ×ₙ₋₁ Aₙ₋₁ᵀ ×ₙ₊₁ Aₙ₊₁ᵀ ... ×ₙ Aₙᵀ`
    ///      - Update `Aₙ = leading Rₙ left singular vectors of unfold(Y, n)`
    /// 3. Compute core: `G = T ×₁ A₁ᵀ ×₂ A₂ᵀ ... ×ₙ Aₙᵀ`
    ///
    /// # Convergence
    ///
    /// - Always converges (monotonically decreasing error)
    /// - May converge to local minimum
    /// - Typically converges in 5-20 iterations
    ///
    /// # Arguments
    ///
    /// * `tensor` - Input tensor
    /// * `ranks` - Multilinear ranks `[R₁, R₂, ..., Rₙ]`
    /// * `options` - Algorithm options (max_iter, tolerance, initialization)
    ///
    /// # Returns
    ///
    /// Tucker decomposition with approximately orthogonal factors
    fn tucker(
        &self,
        tensor: &Tensor<R>,
        ranks: &[usize],
        options: TuckerOptions,
    ) -> Result<TuckerDecomposition<R>> {
        let _ = (tensor, ranks, options);
        Err(Error::NotImplemented {
            feature: "TensorDecomposeAlgorithms::tucker",
        })
    }

    /// CP/PARAFAC decomposition via Alternating Least Squares (ALS)
    ///
    /// Decomposes a tensor as a sum of rank-one tensors using the ALS algorithm.
    ///
    /// # Algorithm
    ///
    /// 1. Initialize factor matrices randomly or via SVD
    /// 2. Repeat until convergence:
    ///    - For each mode `n`:
    ///      - Fix all factors except `Aₙ`
    ///      - Solve least squares for `Aₙ`:
    ///        `Aₙ = T₍ₙ₎ @ (⊙ⱼ≠ₙ Aⱼ) @ (⊛ⱼ≠ₙ AⱼᵀAⱼ)⁻¹`
    ///    - Optionally normalize factors and update weights
    /// 3. Return factor matrices and weights
    ///
    /// # Uniqueness
    ///
    /// CP decomposition is essentially unique (up to permutation and scaling)
    /// under Kruskal's condition:
    ///
    /// ```text
    /// krank(A₁) + krank(A₂) + ... + krank(Aₙ) ≥ 2R + (N - 1)
    /// ```
    ///
    /// where `krank` is the Kruskal rank.
    ///
    /// # Arguments
    ///
    /// * `tensor` - Input tensor
    /// * `rank` - CP rank (number of rank-1 components)
    /// * `options` - Algorithm options (max_iter, tolerance, initialization)
    ///
    /// # Returns
    ///
    /// CP decomposition with factor matrices and weights
    fn cp_decompose(
        &self,
        tensor: &Tensor<R>,
        rank: usize,
        options: CpOptions,
    ) -> Result<CpDecomposition<R>> {
        let _ = (tensor, rank, options);
        Err(Error::NotImplemented {
            feature: "TensorDecomposeAlgorithms::cp_decompose",
        })
    }

    /// Tensor-Train (TT) decomposition via TT-SVD
    ///
    /// Decomposes a tensor into a train of 3D core tensors connected by
    /// contractions, using sequential SVD.
    ///
    /// # Algorithm (TT-SVD)
    ///
    /// 1. Reshape T to `[I₁, I₂ × ... × Iₙ]`
    /// 2. Compute truncated SVD: `T ≈ U @ S @ Vᵀ`, keep rank `R₁`
    /// 3. Set `G₁ = reshape(U, [1, I₁, R₁])`
    /// 4. Reshape `S @ Vᵀ` to `[R₁ × I₂, I₃ × ... × Iₙ]`
    /// 5. Repeat SVD for each mode
    /// 6. Last core `Gₙ` has shape `[Rₙ₋₁, Iₙ, 1]`
    ///
    /// # Rank Selection
    ///
    /// TT-ranks are determined by the `tolerance` parameter:
    /// - Keep singular values until cumulative truncation error < tolerance × ||T||
    /// - Or use `max_rank` to cap the maximum TT-rank
    ///
    /// # Quasi-Optimal
    ///
    /// TT-SVD is quasi-optimal: the error is at most `√(N-1)` times
    /// the best possible error for the given ranks.
    ///
    /// # Arguments
    ///
    /// * `tensor` - Input tensor
    /// * `max_rank` - Maximum allowed TT-rank (0 for no limit)
    /// * `tolerance` - Relative tolerance for rank truncation
    ///
    /// # Returns
    ///
    /// Tensor-Train decomposition with sequence of 3D cores
    fn tensor_train(
        &self,
        tensor: &Tensor<R>,
        max_rank: usize,
        tolerance: f64,
    ) -> Result<TensorTrainDecomposition<R>> {
        let _ = (tensor, max_rank, tolerance);
        Err(Error::NotImplemented {
            feature: "TensorDecomposeAlgorithms::tensor_train",
        })
    }

    /// Reconstruct tensor from Tucker decomposition
    ///
    /// Computes: `T = G ×₁ A₁ ×₂ A₂ ... ×ₙ Aₙ`
    ///
    /// # Arguments
    ///
    /// * `decomp` - Tucker decomposition (core + factor matrices)
    ///
    /// # Returns
    ///
    /// Full tensor of shape `[I₁, I₂, ..., Iₙ]`
    fn tucker_reconstruct(&self, decomp: &TuckerDecomposition<R>) -> Result<Tensor<R>> {
        let _ = decomp;
        Err(Error::NotImplemented {
            feature: "TensorDecomposeAlgorithms::tucker_reconstruct",
        })
    }

    /// Reconstruct tensor from CP decomposition
    ///
    /// Computes: `T = Σᵣ λᵣ × (a₁ʳ ⊗ a₂ʳ ⊗ ... ⊗ aₙʳ)`
    ///
    /// # Arguments
    ///
    /// * `decomp` - CP decomposition (factor matrices + weights)
    /// * `shape` - Output tensor shape `[I₁, I₂, ..., Iₙ]`
    ///
    /// # Returns
    ///
    /// Full tensor of the specified shape
    fn cp_reconstruct(&self, decomp: &CpDecomposition<R>, shape: &[usize]) -> Result<Tensor<R>> {
        let _ = (decomp, shape);
        Err(Error::NotImplemented {
            feature: "TensorDecomposeAlgorithms::cp_reconstruct",
        })
    }

    /// Reconstruct tensor from Tensor-Train decomposition
    ///
    /// Contracts all TT-cores to recover the full tensor.
    ///
    /// # Arguments
    ///
    /// * `decomp` - Tensor-Train decomposition (sequence of 3D cores)
    ///
    /// # Returns
    ///
    /// Full tensor of shape `[I₁, I₂, ..., Iₙ]`
    fn tt_reconstruct(&self, decomp: &TensorTrainDecomposition<R>) -> Result<Tensor<R>> {
        let _ = decomp;
        Err(Error::NotImplemented {
            feature: "TensorDecomposeAlgorithms::tt_reconstruct",
        })
    }
}