elid 0.4.24 - Docs.rs

//! Morton (Z-order) curve encoding and decoding
//!
//! This module provides bit-interleaving functions to encode n-dimensional coordinates
//! into a single scalar value (Morton code) that preserves spatial locality.
//!
//! # Overview
//!
//! Morton encoding (also known as Z-order curve encoding) interleaves the bits of
//! multi-dimensional coordinates to create a one-dimensional ordering that preserves
//! locality. Points that are close in the original space tend to have similar Morton codes.
//!
//! # Algorithm
//!
//! **Encoding** (bit interleaving):
//! For each bit position (0..bits_per_dim):
//!   For each dimension (0..coords.len()):
//!     Extract bit at position from coords\[dim\]
//!     Place in result at position: bit_idx * num_dims + dim_idx
//!
//! **Decoding** (reverse bit interleaving):
//! For each bit in code:
//!   Determine which dimension it belongs to (bit_pos % num_dims)
//!   Determine which bit position in that dimension (bit_pos / num_dims)
//!   Set the bit in result\[dim\]
//!
//! # Example
//!
//! ```rust
//! use elid::embeddings::morton::{morton_encode, morton_decode};
//!
//! // 2D coordinates with 3 bits per dimension
//! let coords = vec![5, 3]; // [0b101, 0b011]
//! let code = morton_encode(&coords, 3);
//! assert_eq!(code, 0b11011); // 27
//!
//! // Decode back
//! let decoded = morton_decode(code, 2, 3);
//! assert_eq!(decoded, coords);
//! ```
//!
//! # Performance
//!
//! - **Encoding**: O(n·b) where n=dimensions, b=bits per dimension (~100ns per encode)
//! - **Decoding**: O(n·b) (~100ns per decode)
//! - **Throughput**: ~10M operations/sec
//!
//! # References
//!
//! - **Morton, G.M. (1966)**: "A Computer Oriented Geodetic Data Base and a New Technique in File Sequencing"
//! - **Nordin, S. & Telles, G. (2023)**: "Comparison of Space-Filling Curves for High-Dimensional Indexing"

/// Encode n-dimensional coordinates into a Morton code using bit interleaving
///
/// This function takes a slice of coordinates and interleaves their bits to produce
/// a single u128 Morton code. The resulting code preserves spatial locality properties.
///
/// # Algorithm
///
/// For each bit position (0..bits_per_dim):
///   For each dimension (0..coords.len()):
///     Extract bit at position from coords\[dim\]
///     Place in result at position: bit_idx * num_dims + dim_idx
///
/// # Parameters
///
/// - `coords`: Slice of u16 coordinates (one per dimension)
/// - `bits_per_dim`: Number of bits to use from each coordinate (typically 10-13)
///
/// # Returns
///
/// u128 Morton code with interleaved bits
///
/// # Panics
///
/// Panics if the resulting Morton code would exceed 128 bits:
/// - Maximum dimensions: 128 / bits_per_dim
/// - Example: 10 bits per dim allows 12 dimensions, 13 bits allows 9 dimensions
///
/// # Examples
///
/// ```rust
/// use elid::embeddings::morton::morton_encode;
///
/// // 2D example with 3 bits per dimension
/// let coords = vec![5, 3]; // [0b101, 0b011]
/// let code = morton_encode(&coords, 3);
/// assert_eq!(code, 0b11011); // 27 - bit interleaving: y2x2 y1x1 y0x0 = 011011
///
/// // 3D example
/// let coords = vec![1, 2, 3]; // [0b001, 0b010, 0b011]
/// let code = morton_encode(&coords, 2);
/// assert_eq!(code, 0b110101); // 53 - z1y1x1 z0y0x0 = 110101
/// ```
pub fn morton_encode(coords: &[u16], bits_per_dim: u8) -> u128 {
    let num_dims = coords.len();
    let total_bits = (bits_per_dim as usize) * num_dims;

    assert!(
        total_bits <= 128,
        "Morton code exceeds 128 bits: {} dims × {} bits = {} bits",
        num_dims,
        bits_per_dim,
        total_bits
    );

    let mut result: u128 = 0;

    // Interleave bits: for each bit position, extract from all dimensions
    for bit_idx in 0..bits_per_dim {
        for (dim_idx, &coord) in coords.iter().enumerate() {
            // Extract bit at bit_idx from this coordinate
            let bit = (coord >> bit_idx) & 1;

            // Calculate position in result: bit_idx * num_dims + dim_idx
            let pos = (bit_idx as usize) * num_dims + dim_idx;

            // Set bit in result
            result |= (bit as u128) << pos;
        }
    }

    result
}

/// Decode a Morton code back into n-dimensional coordinates
///
/// This function reverses the bit interleaving performed by [`morton_encode`],
/// extracting the original coordinates from a Morton code.
///
/// # Algorithm
///
/// For each bit in code (0..num_dims * bits_per_dim):
///   Determine which dimension it belongs to (bit_pos % num_dims)
///   Determine which bit position in that dimension (bit_pos / num_dims)
///   Set the bit in result\[dim\]
///
/// # Parameters
///
/// - `code`: The Morton code to decode
/// - `num_dims`: Number of dimensions in the original coordinates
/// - `bits_per_dim`: Number of bits per dimension used during encoding
///
/// # Returns
///
/// Vector of u16 coordinates (one per dimension)
///
/// # Panics
///
/// Panics if num_dims × bits_per_dim exceeds 128 bits
///
/// # Examples
///
/// ```rust
/// use elid::embeddings::morton::{morton_encode, morton_decode};
///
/// // Roundtrip test
/// let coords = vec![5, 3];
/// let code = morton_encode(&coords, 3);
/// let decoded = morton_decode(code, 2, 3);
/// assert_eq!(decoded, coords);
///
/// // Direct decoding
/// let code = 0b11011; // 27
/// let coords = morton_decode(code, 2, 3);
/// assert_eq!(coords, vec![5, 3]); // [0b101, 0b011]
/// ```
pub fn morton_decode(code: u128, num_dims: u8, bits_per_dim: u8) -> Vec<u16> {
    let num_dims = num_dims as usize;
    let total_bits = bits_per_dim as usize * num_dims;

    assert!(
        total_bits <= 128,
        "Morton code exceeds 128 bits: {} dims × {} bits = {} bits",
        num_dims,
        bits_per_dim,
        total_bits
    );

    // Initialize result vector with zeros
    let mut result = vec![0u16; num_dims];

    // Extract interleaved bits
    for bit_pos in 0..total_bits {
        // Determine which dimension this bit belongs to
        let dim_idx = bit_pos % num_dims;

        // Determine which bit position in that dimension
        let bit_idx = bit_pos / num_dims;

        // Extract bit from Morton code
        let bit = (code >> bit_pos) & 1;

        // Set bit in result coordinate
        result[dim_idx] |= (bit as u16) << bit_idx;
    }

    result
}

#[cfg(test)]
mod tests {
    use super::*;

    // ========================================================================
    // Basic Functionality Tests
    // ========================================================================

    #[test]
    fn test_morton_encode_2d_simple() {
        // coords = [5, 3] = [0b101, 0b011]
        // Using research.md algorithm: pos = bit_idx * num_dims + dim_idx
        // bit_idx=0: x_bit=1 at pos=0, y_bit=1 at pos=1 -> 0b11
        // bit_idx=1: x_bit=0 at pos=2, y_bit=1 at pos=3 -> 0b1011
        // bit_idx=2: x_bit=1 at pos=4, y_bit=0 at pos=5 -> 0b011011
        // Result: 0b011011 = 27
        let coords = vec![5, 3];
        let code = morton_encode(&coords, 3);
        assert_eq!(
            code, 27,
            "Expected 27 (0b{:06b}), got {} (0b{:06b})",
            27, code, code
        );
    }

    #[test]
    fn test_morton_encode_2d_zeros() {
        let coords = vec![0, 0];
        let code = morton_encode(&coords, 3);
        assert_eq!(code, 0);
    }

    #[test]
    fn test_morton_encode_2d_max_values() {
        // Max value for 3 bits is 7 (0b111)
        let coords = vec![7, 7];
        let code = morton_encode(&coords, 3);
        // All bits interleaved: 111111 (binary) = 63
        assert_eq!(code, 0b111111);
    }

    #[test]
    fn test_morton_encode_3d() {
        // 3D: [1, 2, 3] with 2 bits per dimension
        // bit_idx=0: x=1,y=2,z=3 -> bits [1,0,1] at pos [0,1,2] -> 0b101
        // bit_idx=1: x=1,y=2,z=3 -> bits [0,1,1] at pos [3,4,5] -> 0b110101
        // Result: 0b110101 = 53
        let coords = vec![1, 2, 3];
        let code = morton_encode(&coords, 2);
        assert_eq!(code, 53);
    }

    #[test]
    fn test_morton_encode_10d() {
        // 10D test with 10 bits per dimension (typical ELID use case)
        let coords = vec![512, 256, 128, 64, 32, 16, 8, 4, 2, 1];
        let code = morton_encode(&coords, 10);
        assert!(code > 0, "Code should be non-zero");

        // Verify dimensions fit (const assertion already checked at compile time)
        const _: () = assert!(10 * 10 <= 128, "Should fit in u128");
    }

    // ========================================================================
    // Decode Tests
    // ========================================================================

    #[test]
    fn test_morton_decode_2d_simple() {
        let code = 27; // 0b011011 from encoding [5, 3]
        let coords = morton_decode(code, 2, 3);
        assert_eq!(coords, vec![5, 3]);
    }

    #[test]
    fn test_morton_decode_2d_zeros() {
        let code = 0;
        let coords = morton_decode(code, 2, 3);
        assert_eq!(coords, vec![0, 0]);
    }

    #[test]
    fn test_morton_decode_3d() {
        let code = 53; // From encode test: [1, 2, 3] -> 53
        let coords = morton_decode(code, 3, 2);
        assert_eq!(coords, vec![1, 2, 3]);
    }

    // ========================================================================
    // Roundtrip Tests
    // ========================================================================

    #[test]
    fn test_roundtrip_2d() {
        let original = vec![5, 3];
        let encoded = morton_encode(&original, 3);
        let decoded = morton_decode(encoded, 2, 3);
        assert_eq!(decoded, original);
    }

    #[test]
    fn test_roundtrip_3d() {
        let original = vec![7, 5, 3];
        let encoded = morton_encode(&original, 4);
        let decoded = morton_decode(encoded, 3, 4);
        assert_eq!(decoded, original);
    }

    #[test]
    fn test_roundtrip_10d() {
        // Test 10D coordinates with 10 bits per dimension
        let original = vec![512, 256, 128, 64, 32, 16, 8, 4, 2, 1];
        let encoded = morton_encode(&original, 10);
        let decoded = morton_decode(encoded, 10, 10);
        assert_eq!(decoded, original);
    }

    #[test]
    fn test_roundtrip_max_dimensions() {
        // Maximum dimensions for 10 bits: 12 dimensions (12 * 10 = 120 bits)
        let original = vec![1; 12];
        let encoded = morton_encode(&original, 10);
        let decoded = morton_decode(encoded, 12, 10);
        assert_eq!(decoded, original);
    }

    #[test]
    fn test_roundtrip_all_zeros() {
        let original = vec![0; 10];
        let encoded = morton_encode(&original, 10);
        let decoded = morton_decode(encoded, 10, 10);
        assert_eq!(decoded, original);
    }

    #[test]
    fn test_roundtrip_all_max() {
        // Max value for 10 bits is 1023
        let original = vec![1023; 10];
        let encoded = morton_encode(&original, 10);
        let decoded = morton_decode(encoded, 10, 10);
        assert_eq!(decoded, original);
    }

    // ========================================================================
    // Edge Cases
    // ========================================================================

    #[test]
    fn test_single_dimension() {
        let original = vec![42];
        let encoded = morton_encode(&original, 8);
        let decoded = morton_decode(encoded, 1, 8);
        assert_eq!(decoded, original);
        assert_eq!(encoded, 42); // Single dimension should equal the value
    }

    #[test]
    fn test_one_bit_per_dimension() {
        let original = vec![1, 0, 1, 0];
        let encoded = morton_encode(&original, 1);
        let decoded = morton_decode(encoded, 4, 1);
        assert_eq!(decoded, original);
    }

    #[test]
    #[should_panic(expected = "Morton code exceeds 128 bits")]
    fn test_encode_exceeds_128_bits() {
        // 13 bits × 10 dims = 130 bits > 128
        let coords = vec![1; 10];
        morton_encode(&coords, 13);
    }

    #[test]
    #[should_panic(expected = "Morton code exceeds 128 bits")]
    fn test_decode_exceeds_128_bits() {
        // 13 bits × 10 dims = 130 bits > 128
        morton_decode(0, 10, 13);
    }

    // ========================================================================
    // Known Values (Manual Verification)
    // ========================================================================

    #[test]
    fn test_known_value_2d_simple() {
        // Manual calculation using research.md algorithm:
        // x=2=0b10, y=3=0b11
        // bit_idx=0: x_bit=0 pos=0, y_bit=1 pos=1 -> 0b10
        // bit_idx=1: x_bit=1 pos=2, y_bit=1 pos=3 -> 0b1110
        // Result: 0b1110 = 14
        let coords = vec![2, 3];
        let code = morton_encode(&coords, 2);
        assert_eq!(code, 14);

        let decoded = morton_decode(14, 2, 2);
        assert_eq!(decoded, vec![2, 3]);
    }

    #[test]
    fn test_known_value_3d() {
        // x=4=0b100, y=2=0b010, z=1=0b001 with 3 bits
        // Result from Python simulation: 84 = 0b001010100
        let coords = vec![4, 2, 1];
        let code = morton_encode(&coords, 3);
        assert_eq!(code, 84);

        let decoded = morton_decode(84, 3, 3);
        assert_eq!(decoded, vec![4, 2, 1]);
    }

    // ========================================================================
    // Locality Preservation Tests
    // ========================================================================

    #[test]
    fn test_locality_adjacent_x() {
        // Points adjacent in X should have close Morton codes
        let p1 = vec![10, 5];
        let p2 = vec![11, 5]; // Adjacent in X

        let code1 = morton_encode(&p1, 10);
        let code2 = morton_encode(&p2, 10);

        // Codes should differ in low-order bits
        let diff = code1 ^ code2;
        assert!(diff > 0, "Codes should be different");

        // Hamming distance should be relatively small
        let hamming = diff.count_ones();
        assert!(
            hamming < 10,
            "Adjacent points should have low Hamming distance"
        );
    }

    #[test]
    fn test_locality_adjacent_y() {
        // Points adjacent in Y should have close Morton codes
        let p1 = vec![5, 10];
        let p2 = vec![5, 11]; // Adjacent in Y

        let code1 = morton_encode(&p1, 10);
        let code2 = morton_encode(&p2, 10);

        let diff = code1 ^ code2;
        assert!(diff > 0, "Codes should be different");

        let hamming = diff.count_ones();
        assert!(
            hamming < 10,
            "Adjacent points should have low Hamming distance"
        );
    }

    #[test]
    fn test_locality_diagonal() {
        // Points diagonal should still be relatively close
        let p1 = vec![10, 10];
        let p2 = vec![11, 11]; // Diagonal neighbor

        let code1 = morton_encode(&p1, 10);
        let code2 = morton_encode(&p2, 10);

        let diff = code1 ^ code2;
        let hamming = diff.count_ones();

        // Diagonal points should have reasonable locality
        assert!(
            hamming < 15,
            "Diagonal neighbors should have reasonable locality"
        );
    }

    #[test]
    fn test_locality_far_apart() {
        // Points far apart should have very different codes
        let p1 = vec![0, 0];
        let p2 = vec![1023, 1023]; // Opposite corners

        let code1 = morton_encode(&p1, 10);
        let code2 = morton_encode(&p2, 10);

        let diff = code1 ^ code2;
        let hamming = diff.count_ones();

        // Distant points should have high Hamming distance
        assert!(
            hamming > 10,
            "Distant points should have high Hamming distance"
        );
    }

    // ========================================================================
    // Different Dimensions (2D-10D)
    // ========================================================================

    #[test]
    fn test_various_dimensions() {
        for dims in 2..=10 {
            let coords = vec![100; dims];
            let encoded = morton_encode(&coords, 10);
            let decoded = morton_decode(encoded, dims as u8, 10);
            assert_eq!(decoded, coords, "Roundtrip failed for {} dimensions", dims);
        }
    }

    #[test]
    fn test_2d_comprehensive() {
        // Test several 2D points with explicit bit counts
        let test_cases = vec![
            (vec![0, 0], 1),
            (vec![1, 0], 2),
            (vec![0, 1], 2),
            (vec![1, 1], 2),
            (vec![2, 2], 2),
            (vec![3, 3], 2),
            (vec![15, 15], 4),
            (vec![255, 255], 8),
        ];

        for (coords, bits) in test_cases {
            let code = morton_encode(&coords, bits);
            let decoded = morton_decode(code, 2, bits);
            assert_eq!(decoded, coords, "Roundtrip failed for {:?}", coords);
        }
    }

    // ========================================================================
    // Performance Characteristics
    // ========================================================================

    #[test]
    fn test_encoding_deterministic() {
        let coords = vec![512, 256, 128, 64, 32];
        let code1 = morton_encode(&coords, 10);
        let code2 = morton_encode(&coords, 10);
        assert_eq!(code1, code2, "Encoding should be deterministic");
    }

    #[test]
    fn test_bit_pattern_correctness() {
        // Verify bit pattern for simple case
        let coords = vec![0b11, 0b10]; // [3, 2] in 2 bits
        let code = morton_encode(&coords, 2);
        // Result from Python simulation: [3, 2] -> 13 = 0b1101
        assert_eq!(code, 13);
    }
}