hilbert/
normalize.rs

1//! The `normalize` module contains `IntegerDataRange` and `FloatDataRange`.
2//! These structs are used to normalize point coordinates to prepare them for the **Hilbert transform**.
3
4use std::{i32, u32, u64, f64};
5use std::iter::{Iterator, IntoIterator};
6
7
8
9
10/// Smallest power of two such that two raised to that power is greater than or equal to the given number.
11///
12///   - `n` - Find the smallest power of two that equals or exceeds this value.
13///   - Returns - The power, not the number (unlike u64::next_power_of_two, which returns the number, not its log base 2). 
14/// 
15/// Examples: 
16///   - smallest_power_of_two(7) returns 3, because 2^3 = 8 which is **greater than** 7.
17///   - smallest_power_of_two(16) returns 4, because 2^4 = 16 which is **equal to** 16.
18pub fn smallest_power_of_two<N>(n : N) -> usize
19where N : Into<u64>
20{
21    let n64 : u64 = n.into();
22    let mut r = 1_u64;
23    let mut log_two = 0;
24    while r < n64
25    {
26        r <<= 1;
27        log_two += 1;
28    }
29    log_two
30}
31
32/// Find the minimum number of bits required to represent the given number.
33/// 
34/// Examples: 
35///   - bits_required(7) returns 3, because 2^3 = 8 which is **greater than** 7.
36///   - bits_required(16) returns 5, because 2^5 = 32 which is **greater** 16.
37pub fn bits_required<N>(n : N) -> usize
38where N : Into<u64>
39{
40    smallest_power_of_two(n.into() + 1)
41}
42
43
44/// Holds the observed range of values seen in a set of points whose coordinates are 32-bit integers (signed or unsigned).
45/// 
46/// From this information we can determine how to translate the coordinates so that their normalized range has a low of zero.
47/// This permits us to use the fewest number of bits to represent them when constructing a **Hilbert index**.
48/// 
49///   - Use the `normalize` method to adjust a coordinate value into range such that the full precision of the number is preserved.
50///   - Use the `compress` method to both normalize and shrink the coordinate value, 
51///     to reduce the number of bits used per coordinate, at the expense of less precision.
52/// 
53/// **Motivation**. 
54/// 
55///   1. The Hilbert Curve transformation requires _non-negative numbers_, so all values must be translated until 
56///      the lowest is zero. That means that if the lowest value is negative, we shift to the positive direction, 
57///      or contrariwise to the negative direction. 
58///   2. The _execution time_ of the Hilbert transformation is directly proportional to the _number
59///      of bits_ used to represent each value. Thus if we can sacrifice some precision, each value can be
60///      right-shifted by the same number of bits if we wish to compress the range. 
61#[derive(Clone, PartialEq, Debug)]
62pub struct IntegerDataRange {
63    /// Lowest value of any coordinate of any point in a collection of points.
64    pub low : i64,
65
66    /// Highest value of any coordinate of any point in a collection of points.
67    pub high : i64,
68
69    /// Minimum number of bits required to represent a normalized value without loss of information. 
70    /// 
71    /// Examples: 
72    /// 
73    ///    - If `low` is -10 and `high` is 24 then the range is 34 so 6 bits are required to represent all values in that range. 
74    ///    - If `low` is 50 and `high` is 67 then the range is 17 so 5 bits are required to represent all values in that range. 
75    pub bits_required : usize
76}
77
78impl IntegerDataRange {
79
80    /// Create an `IntegerDataRange` without reference to particular data. 
81    pub fn new<I>(low_i : I, high_i : I) -> Self 
82    where I : Into<i64>
83    {
84        let low = low_i.into();
85        let high = high_i.into();
86        IntegerDataRange { 
87            low, 
88            high,
89            bits_required : bits_required((high - low) as u64)
90        }
91    }
92
93    /// Study all `i32` coordinates in all points to find the minimum and maximum values. 
94    pub fn from_i32<I>(points : &[I]) -> Self 
95    // The "for<'a>" below is "higher-ranked lifetime" notation. Got it from stackoverflow. Confused about what it does but it is needed!
96    where
97        for<'a> &'a I: IntoIterator<Item = &'a i32>, 
98    {
99        let mut low = i32::MAX;
100        let mut high = i32::MIN;
101        for point in points.iter() {
102            for coordinate in point.into_iter().map(|c| *c) {
103                if coordinate > high { high = coordinate; }
104                if coordinate < low { low = coordinate; }
105            }
106        }
107        Self::new(low, high)
108    }
109
110    /// Study all `u32` coordinates in all points to find the minimum and maximum values. 
111    pub fn from_u32<I>(points : &[I]) -> Self 
112    where
113        for<'a> &'a I: IntoIterator<Item = &'a u32>,
114    {
115        let mut low = u32::MAX as i64;
116        let mut high = 0_i64;
117        for point in points.iter() {
118            for coordinate in point.into_iter().map(|c| *c as i64) {
119                if coordinate > high { high = coordinate; }
120                if coordinate < low { low = coordinate; }
121            }
122        }
123        Self::new(low, high)
124    }
125
126    /// Range from low to high value.
127    pub fn range(&self) -> u32 { (self.high - self.low) as u32 }
128
129    /// Normalize an integer convertible to an i64, shifting it enough so that the minimum value found in any point
130    /// is shifted to zero and the maximum value is shifted to `range`, and using the full number of bits required for the range.
131    pub fn normalize<I>(&self, coordinate : I) -> u32 
132    where I : Into<i64>
133    {
134        (coordinate.into() - self.low) as u32
135    }
136
137    /// Normalize a `coordinate` value, shifting it enough so that the minimum value found in any point
138    /// is shifted to zero and the maximum value is shifted to `range`, then optionally compressing the range by bit shifting
139    /// such that no more than the given number of bits are required for the largest value.
140    pub fn compress<I>(&self, coordinate : I, bits_allocated : usize) -> u32 
141    where I : Into<i64>
142    {
143        let uncompressed = self.normalize(coordinate);
144        if bits_allocated < self.bits_required { uncompressed >> (self.bits_required - bits_allocated) }
145        else { uncompressed }
146    }
147
148}
149
150/// The observed range of values seen in a set of points whose coordinates are signed 64-bit floats.
151/// 
152/// From this information we can determine how to translate the coordinates so that their normalized range has a low of zero.
153/// This permits us to use the fewest number of bits to represent them when constructing a **Hilbert index**.
154/// 
155/// **Motivation**. 
156/// 
157///   1. The Hilbert Curve transformation requires _non-negative numbers_, so all values must be translated until 
158///      the lowest is zero. That means that if the lowest value is negative, we shift to the positive direction, 
159///      or contrariwise to the negative direction. 
160///   2. The _execution time_ of the Hilbert transformation is directly proportional to the _number
161///      of bits_ used to represent each value. Thus if we can sacrifice some precision, each value can be
162///      multiplied by a scale factor and rounded to the nearest integer to compress the value (and sacrifice information). 
163#[derive(Clone, PartialEq, Debug)]
164pub struct FloatDataRange {
165    /// Lowest value of any coordinate of any point in a collection of points.
166    pub low : f64,
167
168
169    /// Highest value of any coordinate of any point in a collection of points.
170    pub high : f64,
171
172    /// Multiplier to apply before rounding to an integer value, sacrificing some precision. 
173    /// For example, if you want to encode values such that you preserve precision to the hundredth place, `scale` should be 100.
174    pub scale : f64, 
175
176    /// Minimum number of bits required to represent a normalized value without loss of information for the given scale factor. 
177    /// 
178    /// Example: 
179    /// 
180    ///    - If `low` is -5.02 and `high` is 3.13 then the range is 8.15. If `scale` is 100, the range becomes 815, 
181    /// so 10 bits are required to represent all values in that range. 
182    pub bits_required : usize
183}
184
185impl FloatDataRange {
186
187    /// Create a `FloatDataRange` without reference to particular data. 
188    pub fn new(low : f64, high : f64, scale : f64) -> Self {
189        let scaled_range = ((high - low) * scale).ceil() as u32;
190        FloatDataRange { 
191            low, 
192            high,
193            scale,
194            bits_required : bits_required(scaled_range)
195        }
196    }
197
198    /// Study all `f64` coordinates in all points to find the minimum and maximum values. 
199    pub fn from_f64<I>(points : &[I], scale : f64) -> Self 
200    // The "for<'a>" below is "higher-ranked lifetime" notation. Got it from stackoverflow. Confused about what it does but it is needed!
201    where
202        for<'a> &'a I: IntoIterator<Item = &'a f64>, 
203    {
204        let mut low = f64::MAX;
205        let mut high = f64::MIN;
206        for point in points.iter() {
207            for coordinate in point.into_iter().map(|c| *c) {
208                if coordinate > high { high = coordinate; }
209                if coordinate < low { low = coordinate; }
210            }
211        }
212        FloatDataRange::new(low, high, scale)
213    }
214
215    /// Range from low to high value.
216    pub fn range(&self) -> f64 { self.high - self.low }
217
218    /// Normalize an `f64` `coordinate` value, shifting it enough so that the minimum value found in any point
219    /// is shifted to zero and the maximum value is shifted to `range`, and using the full number of bits required for the range
220    /// multiplied by the scale.
221    pub fn normalize(&self, coordinate : f64) -> u32 {
222        ((coordinate - self.low) * self.scale).ceil() as u32
223    }
224
225    /// Normalize an `f64` `coordinate` value, shifting it enough so that the minimum value found in any point
226    /// is shifted to zero and the maximum value is shifted to `range`, then optionally compressing the range by bit shifting
227    /// such that no more than the given number of bits are required for the largest value.
228    pub fn compress(&self, coordinate : f64, bits_allocated : usize) -> u32 {
229        let uncompressed = self.normalize(coordinate);
230        if bits_allocated < self.bits_required { uncompressed >> (self.bits_required - bits_allocated) }
231        else { uncompressed }
232    }
233
234}
235
236#[cfg(test)]
237mod tests {
238
239    #[allow(unused_imports)]
240    use spectral::prelude::*;
241    use super::{IntegerDataRange, FloatDataRange};
242
243    #[test]
244    fn from_i32() {
245        let points = test_points_i32();
246        let actual_range = IntegerDataRange::from_i32(&points);
247        let expected_range = IntegerDataRange::new(-100, 100);
248        asserting("Range correct").that(&actual_range).is_equal_to(expected_range);
249    }
250
251    #[test]
252    fn normalize_i32() {
253        let points = test_points_i32();
254        let range = IntegerDataRange::from_i32(&points);
255        let actual_normalized = range.normalize(-16);
256        let expected_normalized = 84;
257        asserting("Normalized value correct").that(&actual_normalized).is_equal_to(expected_normalized);
258    }    
259
260    #[test]
261    fn compress_i32() {
262        let points = test_points_i32();
263        let range = IntegerDataRange::from_i32(&points);
264        let actual_compressed = range.compress(-16, 7);
265        let expected_compressed = 42;
266        asserting("Compressed value correct").that(&actual_compressed).is_equal_to(expected_compressed);
267    }
268
269    #[test]
270    fn smallest_power_of_two() {
271        let in_out_pairs = vec![(0_u64,0), (1,0), (2,1), (3,2), (4,2), (5,3), (6,3), (7,3), (8,3), (9,4)];
272        for (n, expected) in in_out_pairs {
273            let actual = super::smallest_power_of_two(n);
274            asserting(&format!("n = {}, actual = {}, expected = {}", n, actual, expected)).that(&actual).is_equal_to(expected);
275        }
276    }
277
278    #[test]
279    fn from_f64() {
280        let points = test_points_f64();
281        let actual_range = FloatDataRange::from_f64(&points, 10.0);
282        let expected_range = FloatDataRange::new(-100.0, 100.19, 10.0);
283        asserting("Range correct").that(&actual_range).is_equal_to(expected_range);
284    }
285
286    #[test]
287    fn normalize_f64() {
288        let points = test_points_f64();
289        let range = FloatDataRange::from_f64(&points, 10.0);
290        let actual_normalized = range.normalize(-16.0);
291        let expected_normalized = 840;
292        asserting("Normalized value correct").that(&actual_normalized).is_equal_to(expected_normalized);
293    }
294
295    /// Compress a value in a range that requires 11 bits down to 7 bits, forcing a division by 16 and loss of precision. 
296    #[test]
297    fn compress_f64() {
298        let points = test_points_f64();
299        let range = FloatDataRange::from_f64(&points, 10.0);
300        let actual_compressed = range.compress(-16.0, 7);
301        let expected_compressed = 52;
302        asserting("Compressed value correct").that(&actual_compressed).is_equal_to(expected_compressed);
303    }    
304
305    fn test_points_i32() -> Vec<Vec<i32>> {
306       vec![
307           vec![-10, 5, 3],
308           vec![-4, 20, 100],
309           vec![42, -100, 0]
310       ]
311    }
312
313    fn test_points_f64() -> Vec<Vec<f64>> {
314       // Range is 100.9 - -100.0 => 200.9
315       // If scaled by 10, the range is 2009. 
316       vec![
317           vec![-10.5, 5.27, 3.66],
318           vec![-4.802, 20.2, 100.19],
319           vec![42.0, -100.0, 0.0]
320       ]
321    }
322
323}