Skip to main content

nodedb_array/coord/
normalize.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Per-dimension coordinate normalization.
4//!
5//! Space-filling curves (Hilbert, Z-order) operate on equal-width
6//! unsigned integer coordinates. This module maps each [`CoordValue`]
7//! into a `u64` in `[0, 2^bits)` relative to the dimension's
8//! [`Domain`]. The bit budget is shared across dims so the total prefix
9//! fits in 64 bits.
10
11use crate::error::{ArrayError, ArrayResult};
12use crate::schema::dim_spec::DimType;
13use crate::schema::{ArraySchema, DimSpec};
14use crate::types::coord::value::CoordValue;
15use crate::types::domain::DomainBound;
16
17/// Maximum dims supported by ND Hilbert in this implementation.
18pub const MAX_DIMS: usize = 16;
19
20/// Total bit budget for the combined space-filling-curve prefix.
21pub const PREFIX_BITS: u32 = 64;
22
23/// Bits per dim for an `n`-dim schema, dividing [`PREFIX_BITS`] evenly
24/// (truncated). Returns at least 1 bit per dim.
25pub fn bits_per_dim(n_dims: usize) -> u32 {
26    if n_dims == 0 {
27        return 0;
28    }
29    let raw = PREFIX_BITS / n_dims as u32;
30    raw.max(1)
31}
32
33/// Normalize one cell coordinate into per-dim integer coordinates in
34/// `[0, 2^bits)`. The output `Vec` has the same arity as the schema.
35pub fn normalize_coord(
36    schema: &ArraySchema,
37    components: &[CoordValue],
38    bits: u32,
39) -> ArrayResult<Vec<u64>> {
40    if components.len() != schema.arity() {
41        return Err(ArrayError::CoordArityMismatch {
42            array: schema.name.clone(),
43            expected: schema.arity(),
44            got: components.len(),
45        });
46    }
47    let mut out = Vec::with_capacity(components.len());
48    for (dim, value) in schema.dims.iter().zip(components.iter()) {
49        out.push(normalize_one(&schema.name, dim, value, bits)?);
50    }
51    Ok(out)
52}
53
54fn normalize_one(array: &str, dim: &DimSpec, value: &CoordValue, bits: u32) -> ArrayResult<u64> {
55    let max = if bits >= 64 {
56        u64::MAX
57    } else {
58        (1u64 << bits) - 1
59    };
60    match (&dim.dtype, value, &dim.domain.lo, &dim.domain.hi) {
61        (DimType::Int64, CoordValue::Int64(v), DomainBound::Int64(lo), DomainBound::Int64(hi))
62        | (
63            DimType::TimestampMs,
64            CoordValue::TimestampMs(v),
65            DomainBound::TimestampMs(lo),
66            DomainBound::TimestampMs(hi),
67        ) => map_int(array, dim, *v, *lo, *hi, max),
68        (
69            DimType::Float64,
70            CoordValue::Float64(v),
71            DomainBound::Float64(lo),
72            DomainBound::Float64(hi),
73        ) => map_float(array, dim, *v, *lo, *hi, max),
74        (
75            DimType::String,
76            CoordValue::String(v),
77            DomainBound::String(_),
78            DomainBound::String(_),
79        ) => Ok(map_string(v, max)),
80        _ => Err(ArrayError::CoordOutOfDomain {
81            array: array.to_string(),
82            dim: dim.name.clone(),
83            detail: "coord variant does not match dim dtype".to_string(),
84        }),
85    }
86}
87
88fn map_int(array: &str, dim: &DimSpec, v: i64, lo: i64, hi: i64, max: u64) -> ArrayResult<u64> {
89    if v < lo || v > hi {
90        return Err(ArrayError::CoordOutOfDomain {
91            array: array.to_string(),
92            dim: dim.name.clone(),
93            detail: format!("value {v} outside [{lo}, {hi}]"),
94        });
95    }
96    let span = (hi as i128) - (lo as i128);
97    if span <= 0 {
98        return Ok(0);
99    }
100    let off = (v as i128) - (lo as i128);
101    // Linear scale `off` ∈ [0, span] → [0, max].
102    let scaled = ((off as u128) * (max as u128)) / (span as u128);
103    Ok(scaled as u64)
104}
105
106fn map_float(array: &str, dim: &DimSpec, v: f64, lo: f64, hi: f64, max: u64) -> ArrayResult<u64> {
107    if !v.is_finite() || v < lo || v > hi {
108        return Err(ArrayError::CoordOutOfDomain {
109            array: array.to_string(),
110            dim: dim.name.clone(),
111            detail: format!("value {v} outside [{lo}, {hi}] or non-finite"),
112        });
113    }
114    let span = hi - lo;
115    if span <= 0.0 {
116        return Ok(0);
117    }
118    let frac = ((v - lo) / span).clamp(0.0, 1.0);
119    Ok((frac * (max as f64)) as u64)
120}
121
122/// String dims fall back to a deterministic hash bucket. Hilbert/Z-order
123/// over strings is inherently lossy — preserves equality, not ordering.
124/// Future work may swap in dictionary-based sort-preserving codes.
125fn map_string(v: &str, max: u64) -> u64 {
126    super::string_hash::hash_string_masked(v, max)
127}
128
129#[cfg(test)]
130mod tests {
131    use super::*;
132    use crate::schema::ArraySchemaBuilder;
133    use crate::schema::attr_spec::{AttrSpec, AttrType};
134    use crate::types::domain::Domain;
135
136    fn schema_2d() -> ArraySchema {
137        ArraySchemaBuilder::new("g")
138            .dim(DimSpec::new(
139                "x",
140                DimType::Int64,
141                Domain::new(DomainBound::Int64(0), DomainBound::Int64(100)),
142            ))
143            .dim(DimSpec::new(
144                "y",
145                DimType::Int64,
146                Domain::new(DomainBound::Int64(0), DomainBound::Int64(100)),
147            ))
148            .attr(AttrSpec::new("v", AttrType::Int64, false))
149            .tile_extents(vec![10, 10])
150            .build()
151            .unwrap()
152    }
153
154    #[test]
155    fn bits_per_dim_distributes_64() {
156        assert_eq!(bits_per_dim(1), 64);
157        assert_eq!(bits_per_dim(2), 32);
158        assert_eq!(bits_per_dim(4), 16);
159        assert_eq!(bits_per_dim(16), 4);
160    }
161
162    #[test]
163    fn bits_per_dim_floor_one() {
164        assert_eq!(bits_per_dim(100), 1);
165    }
166
167    #[test]
168    fn normalize_int_lo_maps_to_zero() {
169        let s = schema_2d();
170        let n = normalize_coord(&s, &[CoordValue::Int64(0), CoordValue::Int64(0)], 8).unwrap();
171        assert_eq!(n, vec![0, 0]);
172    }
173
174    #[test]
175    fn normalize_int_hi_maps_to_max() {
176        let s = schema_2d();
177        let n = normalize_coord(&s, &[CoordValue::Int64(100), CoordValue::Int64(100)], 8).unwrap();
178        assert_eq!(n, vec![255, 255]);
179    }
180
181    #[test]
182    fn normalize_rejects_out_of_domain() {
183        let s = schema_2d();
184        let r = normalize_coord(&s, &[CoordValue::Int64(101), CoordValue::Int64(0)], 8);
185        assert!(r.is_err());
186    }
187
188    #[test]
189    fn normalize_rejects_arity_mismatch() {
190        let s = schema_2d();
191        let r = normalize_coord(&s, &[CoordValue::Int64(0)], 8);
192        assert!(r.is_err());
193    }
194
195    #[test]
196    fn normalize_rejects_type_mismatch() {
197        let s = schema_2d();
198        let r = normalize_coord(&s, &[CoordValue::Float64(0.0), CoordValue::Int64(0)], 8);
199        assert!(r.is_err());
200    }
201}