cloudini 0.3.1 - Docs.rs

//! Per-field encoding logic for the Cloudini format.
//!
//! The encoder selects a [`FieldEncoder`] variant for each field based on its type
//! and the requested [`crate::EncodingOptions`]:
//!
//! | Condition | Variant |
//! |-----------|---------|
//! | `NONE` mode, or INT8/UINT8 | `Copy` — raw bytes |
//! | INT16–UINT64 | `Int` — delta + zigzag varint |
//! | FLOAT32 + resolution, 3–4 leading fields | `FloatNLossy` — SIMD-style batch quantisation |
//! | FLOAT32 + resolution, other positions | `Float32Lossy` — single-field quantisation |
//! | FLOAT32 without resolution | `Copy` — raw bytes |
//! | FLOAT64 + resolution | `Float64Lossy` — double-precision quantisation |
//! | FLOAT64 without resolution | `Float64Xor` — lossless XOR with previous bits |

use crate::types::FieldType;
use crate::varint::encode_varint64;

/// Read a point field as `i64`, matching C++ `ToInt64<T>` sign/zero-extension behaviour.
fn read_as_i64(point: &[u8], offset: usize, field_type: FieldType) -> i64 {
    let bytes = &point[offset..];
    match field_type {
        FieldType::Int8 => i8::from_le_bytes([bytes[0]]) as i64,
        FieldType::Uint8 => u8::from_le_bytes([bytes[0]]) as i64,
        FieldType::Int16 => i16::from_le_bytes([bytes[0], bytes[1]]) as i64,
        FieldType::Uint16 => u16::from_le_bytes([bytes[0], bytes[1]]) as i64,
        FieldType::Int32 => i32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as i64,
        FieldType::Uint32 => u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as i64,
        FieldType::Int64 => i64::from_le_bytes([
            bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
        ]),
        FieldType::Uint64 => u64::from_le_bytes([
            bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
        ]) as i64,
        _ => 0,
    }
}

/// A stateful encoder for one point field (or a batch of consecutive float fields).
///
/// Call [`FieldEncoder::reset`] at the start of each chunk to clear delta state.
/// Call [`FieldEncoder::encode`] once per point.
pub enum FieldEncoder {
    /// Raw byte copy — used for INT8/UINT8 and unquantised floats.
    Copy { offset: usize, size: usize },
    /// Delta + zigzag varint — used for all integer types in non-NONE mode.
    Int {
        offset: usize,
        field_type: FieldType,
        prev: i64,
    },
    /// Lossy FLOAT32: `round(v * (1/resolution))` → i64 delta → varint.
    Float32Lossy {
        offset: usize,
        /// `1.0f32 / resolution` (matches C++ float-precision division)
        multiplier: f32,
        prev: i64,
    },
    /// Lossy FLOAT64: `round(v * (1/resolution))` → i64 delta → varint.
    Float64Lossy {
        offset: usize,
        /// `1.0f64 / resolution`
        multiplier: f64,
        prev: i64,
    },
    /// Lossless FLOAT64: XOR current bits with previous bits → store 8 raw bytes.
    Float64Xor { offset: usize, prev_bits: u64 },
    /// Batch lossy encoder for 3 or 4 consecutive FLOAT32 fields with resolution.
    ///
    /// Uses `i32` delta accumulators (matching the reference SIMD implementation) so
    /// that large pointclouds with many accumulated deltas wrap identically.
    FloatNLossy {
        offsets: [usize; 4],
        /// `1.0f32 / resolution[i]` for each component
        multipliers: [f32; 4],
        prev: [i32; 4],
        /// 3 or 4
        count: usize,
    },
}

impl FieldEncoder {
    /// Reset delta/XOR state. Must be called at the start of every chunk.
    pub fn reset(&mut self) {
        match self {
            FieldEncoder::Int { prev, .. } => *prev = 0,
            FieldEncoder::Float32Lossy { prev, .. } => *prev = 0,
            FieldEncoder::Float64Lossy { prev, .. } => *prev = 0,
            FieldEncoder::Float64Xor { prev_bits, .. } => *prev_bits = 0,
            FieldEncoder::FloatNLossy { prev, .. } => *prev = [0i32; 4],
            FieldEncoder::Copy { .. } => {}
        }
    }

    /// Encode the field(s) for one point into `out`. Returns the number of bytes written.
    ///
    /// `out` must have at least 10 bytes of space (worst-case varint) per field handled
    /// by this encoder, or 8 bytes for [`FieldEncoder::Float64Xor`].
    pub fn encode(&mut self, point: &[u8], out: &mut [u8]) -> usize {
        match self {
            FieldEncoder::Copy { offset, size } => {
                out[..*size].copy_from_slice(&point[*offset..*offset + *size]);
                *size
            }

            FieldEncoder::Int {
                offset,
                field_type,
                prev,
            } => {
                let value = read_as_i64(point, *offset, *field_type);
                let diff = value - *prev;
                *prev = value;
                encode_varint64(diff, out)
            }

            FieldEncoder::Float32Lossy {
                offset,
                multiplier,
                prev,
            } => {
                let val = f32::from_le_bytes(point[*offset..*offset + 4].try_into().unwrap());
                if val.is_nan() {
                    out[0] = 0; // NaN sentinel
                    *prev = 0;
                    return 1;
                }
                let quantized = (val * *multiplier).round() as i64;
                let diff = quantized - *prev;
                *prev = quantized;
                encode_varint64(diff, out)
            }

            FieldEncoder::Float64Lossy {
                offset,
                multiplier,
                prev,
            } => {
                let val = f64::from_le_bytes(point[*offset..*offset + 8].try_into().unwrap());
                if val.is_nan() {
                    out[0] = 0;
                    *prev = 0;
                    return 1;
                }
                let quantized = (val * *multiplier).round() as i64;
                let diff = quantized - *prev;
                *prev = quantized;
                encode_varint64(diff, out)
            }

            FieldEncoder::Float64Xor { offset, prev_bits } => {
                let current = u64::from_le_bytes(point[*offset..*offset + 8].try_into().unwrap());
                let residual = current ^ *prev_bits;
                *prev_bits = current;
                out[..8].copy_from_slice(&residual.to_le_bytes());
                8
            }

            FieldEncoder::FloatNLossy {
                offsets,
                multipliers,
                prev,
                count,
            } => {
                let n = *count;
                let mut ptr = 0usize;

                for i in 0..n {
                    let val =
                        f32::from_le_bytes(point[offsets[i]..offsets[i] + 4].try_into().unwrap());
                    if val.is_nan() {
                        out[ptr] = 0; // NaN sentinel
                        prev[i] = 0;
                        ptr += 1;
                    } else {
                        // i32 delta matches the reference SIMD path (Vector4i accumulator)
                        let quantized = (val * multipliers[i]).round() as i32;
                        let delta = quantized.wrapping_sub(prev[i]);
                        prev[i] = quantized;

                        ptr += encode_varint64(delta as i64, &mut out[ptr..]);
                    }
                }
                ptr
            }
        }
    }
}

/// Build the ordered list of field encoders for a given set of fields and encoding mode.
///
/// Mirrors the C++ `PointcloudEncoder` constructor logic:
/// - If the first 3 or 4 fields are all `FLOAT32` with a resolution and the mode is `Lossy`,
///   they are handled by a single [`FieldEncoder::FloatNLossy`].
/// - All remaining fields are encoded individually.
pub fn build_encoders(
    fields: &[crate::types::PointField],
    encoding_opt: crate::types::EncodingOptions,
) -> Vec<FieldEncoder> {
    use crate::types::EncodingOptions;

    let mut encoders: Vec<FieldEncoder> = Vec::new();
    let mut start_index = 0;

    if encoding_opt == EncodingOptions::None {
        for field in fields {
            encoders.push(FieldEncoder::Copy {
                offset: field.offset as usize,
                size: field.field_type.size_of(),
            });
        }
        return encoders;
    }

    // Try FloatNLossy for the first run of FLOAT32+resolution fields (3 or 4 only)
    if encoding_opt == EncodingOptions::Lossy {
        let floats_count = fields
            .iter()
            .take_while(|f| f.field_type == FieldType::Float32 && f.resolution.is_some())
            .count();

        if floats_count == 3 || floats_count == 4 {
            let mut offsets = [0usize; 4];
            let mut multipliers = [0.0f32; 4];
            for i in 0..floats_count {
                offsets[i] = fields[i].offset as usize;
                multipliers[i] = 1.0f32 / fields[i].resolution.unwrap();
            }
            encoders.push(FieldEncoder::FloatNLossy {
                offsets,
                multipliers,
                prev: [0i32; 4],
                count: floats_count,
            });
            start_index = floats_count;
        }
    }

    for field in &fields[start_index..] {
        let offset = field.offset as usize;
        let encoder = match field.field_type {
            FieldType::Float32 => {
                if encoding_opt == EncodingOptions::Lossy {
                    if let Some(res) = field.resolution {
                        // Use double-precision reciprocal then truncate to f32 (matches C++)
                        FieldEncoder::Float32Lossy {
                            offset,
                            multiplier: (1.0f64 / res as f64) as f32,
                            prev: 0,
                        }
                    } else {
                        FieldEncoder::Copy { offset, size: 4 }
                    }
                } else {
                    FieldEncoder::Copy { offset, size: 4 }
                }
            }
            FieldType::Float64 => {
                if encoding_opt == EncodingOptions::Lossy {
                    if let Some(res) = field.resolution {
                        FieldEncoder::Float64Lossy {
                            offset,
                            multiplier: 1.0f64 / res as f64,
                            prev: 0,
                        }
                    } else {
                        FieldEncoder::Float64Xor {
                            offset,
                            prev_bits: 0,
                        }
                    }
                } else {
                    FieldEncoder::Float64Xor {
                        offset,
                        prev_bits: 0,
                    }
                }
            }
            FieldType::Int16 => FieldEncoder::Int {
                offset,
                field_type: FieldType::Int16,
                prev: 0,
            },
            FieldType::Uint16 => FieldEncoder::Int {
                offset,
                field_type: FieldType::Uint16,
                prev: 0,
            },
            FieldType::Int32 => FieldEncoder::Int {
                offset,
                field_type: FieldType::Int32,
                prev: 0,
            },
            FieldType::Uint32 => FieldEncoder::Int {
                offset,
                field_type: FieldType::Uint32,
                prev: 0,
            },
            FieldType::Int64 => FieldEncoder::Int {
                offset,
                field_type: FieldType::Int64,
                prev: 0,
            },
            FieldType::Uint64 => FieldEncoder::Int {
                offset,
                field_type: FieldType::Uint64,
                prev: 0,
            },
            FieldType::Int8 | FieldType::Uint8 => FieldEncoder::Copy { offset, size: 1 },
            _ => panic!("Unsupported field type"),
        };
        encoders.push(encoder);
    }

    encoders
}