hilbert/normalize.rs
1//! The `normalize` module contains `IntegerDataRange` and `FloatDataRange`.
2//! These structs are used to normalize point coordinates to prepare them for the **Hilbert transform**.
3
4use std::{i32, u32, u64, f64};
5use std::iter::{Iterator, IntoIterator};
6
7
8
9
10/// Smallest power of two such that two raised to that power is greater than or equal to the given number.
11///
12/// - `n` - Find the smallest power of two that equals or exceeds this value.
13/// - Returns - The power, not the number (unlike u64::next_power_of_two, which returns the number, not its log base 2).
14///
15/// Examples:
16/// - smallest_power_of_two(7) returns 3, because 2^3 = 8 which is **greater than** 7.
17/// - smallest_power_of_two(16) returns 4, because 2^4 = 16 which is **equal to** 16.
18pub fn smallest_power_of_two<N>(n : N) -> usize
19where N : Into<u64>
20{
21 let n64 : u64 = n.into();
22 let mut r = 1_u64;
23 let mut log_two = 0;
24 while r < n64
25 {
26 r <<= 1;
27 log_two += 1;
28 }
29 log_two
30}
31
32/// Find the minimum number of bits required to represent the given number.
33///
34/// Examples:
35/// - bits_required(7) returns 3, because 2^3 = 8 which is **greater than** 7.
36/// - bits_required(16) returns 5, because 2^5 = 32 which is **greater** 16.
37pub fn bits_required<N>(n : N) -> usize
38where N : Into<u64>
39{
40 smallest_power_of_two(n.into() + 1)
41}
42
43
44/// Holds the observed range of values seen in a set of points whose coordinates are 32-bit integers (signed or unsigned).
45///
46/// From this information we can determine how to translate the coordinates so that their normalized range has a low of zero.
47/// This permits us to use the fewest number of bits to represent them when constructing a **Hilbert index**.
48///
49/// - Use the `normalize` method to adjust a coordinate value into range such that the full precision of the number is preserved.
50/// - Use the `compress` method to both normalize and shrink the coordinate value,
51/// to reduce the number of bits used per coordinate, at the expense of less precision.
52///
53/// **Motivation**.
54///
55/// 1. The Hilbert Curve transformation requires _non-negative numbers_, so all values must be translated until
56/// the lowest is zero. That means that if the lowest value is negative, we shift to the positive direction,
57/// or contrariwise to the negative direction.
58/// 2. The _execution time_ of the Hilbert transformation is directly proportional to the _number
59/// of bits_ used to represent each value. Thus if we can sacrifice some precision, each value can be
60/// right-shifted by the same number of bits if we wish to compress the range.
61#[derive(Clone, PartialEq, Debug)]
62pub struct IntegerDataRange {
63 /// Lowest value of any coordinate of any point in a collection of points.
64 pub low : i64,
65
66 /// Highest value of any coordinate of any point in a collection of points.
67 pub high : i64,
68
69 /// Minimum number of bits required to represent a normalized value without loss of information.
70 ///
71 /// Examples:
72 ///
73 /// - If `low` is -10 and `high` is 24 then the range is 34 so 6 bits are required to represent all values in that range.
74 /// - If `low` is 50 and `high` is 67 then the range is 17 so 5 bits are required to represent all values in that range.
75 pub bits_required : usize
76}
77
78impl IntegerDataRange {
79
80 /// Create an `IntegerDataRange` without reference to particular data.
81 pub fn new<I>(low_i : I, high_i : I) -> Self
82 where I : Into<i64>
83 {
84 let low = low_i.into();
85 let high = high_i.into();
86 IntegerDataRange {
87 low,
88 high,
89 bits_required : bits_required((high - low) as u64)
90 }
91 }
92
93 /// Study all `i32` coordinates in all points to find the minimum and maximum values.
94 pub fn from_i32<I>(points : &[I]) -> Self
95 // The "for<'a>" below is "higher-ranked lifetime" notation. Got it from stackoverflow. Confused about what it does but it is needed!
96 where
97 for<'a> &'a I: IntoIterator<Item = &'a i32>,
98 {
99 let mut low = i32::MAX;
100 let mut high = i32::MIN;
101 for point in points.iter() {
102 for coordinate in point.into_iter().map(|c| *c) {
103 if coordinate > high { high = coordinate; }
104 if coordinate < low { low = coordinate; }
105 }
106 }
107 Self::new(low, high)
108 }
109
110 /// Study all `u32` coordinates in all points to find the minimum and maximum values.
111 pub fn from_u32<I>(points : &[I]) -> Self
112 where
113 for<'a> &'a I: IntoIterator<Item = &'a u32>,
114 {
115 let mut low = u32::MAX as i64;
116 let mut high = 0_i64;
117 for point in points.iter() {
118 for coordinate in point.into_iter().map(|c| *c as i64) {
119 if coordinate > high { high = coordinate; }
120 if coordinate < low { low = coordinate; }
121 }
122 }
123 Self::new(low, high)
124 }
125
126 /// Range from low to high value.
127 pub fn range(&self) -> u32 { (self.high - self.low) as u32 }
128
129 /// Normalize an integer convertible to an i64, shifting it enough so that the minimum value found in any point
130 /// is shifted to zero and the maximum value is shifted to `range`, and using the full number of bits required for the range.
131 pub fn normalize<I>(&self, coordinate : I) -> u32
132 where I : Into<i64>
133 {
134 (coordinate.into() - self.low) as u32
135 }
136
137 /// Normalize a `coordinate` value, shifting it enough so that the minimum value found in any point
138 /// is shifted to zero and the maximum value is shifted to `range`, then optionally compressing the range by bit shifting
139 /// such that no more than the given number of bits are required for the largest value.
140 pub fn compress<I>(&self, coordinate : I, bits_allocated : usize) -> u32
141 where I : Into<i64>
142 {
143 let uncompressed = self.normalize(coordinate);
144 if bits_allocated < self.bits_required { uncompressed >> (self.bits_required - bits_allocated) }
145 else { uncompressed }
146 }
147
148}
149
150/// The observed range of values seen in a set of points whose coordinates are signed 64-bit floats.
151///
152/// From this information we can determine how to translate the coordinates so that their normalized range has a low of zero.
153/// This permits us to use the fewest number of bits to represent them when constructing a **Hilbert index**.
154///
155/// **Motivation**.
156///
157/// 1. The Hilbert Curve transformation requires _non-negative numbers_, so all values must be translated until
158/// the lowest is zero. That means that if the lowest value is negative, we shift to the positive direction,
159/// or contrariwise to the negative direction.
160/// 2. The _execution time_ of the Hilbert transformation is directly proportional to the _number
161/// of bits_ used to represent each value. Thus if we can sacrifice some precision, each value can be
162/// multiplied by a scale factor and rounded to the nearest integer to compress the value (and sacrifice information).
163#[derive(Clone, PartialEq, Debug)]
164pub struct FloatDataRange {
165 /// Lowest value of any coordinate of any point in a collection of points.
166 pub low : f64,
167
168
169 /// Highest value of any coordinate of any point in a collection of points.
170 pub high : f64,
171
172 /// Multiplier to apply before rounding to an integer value, sacrificing some precision.
173 /// For example, if you want to encode values such that you preserve precision to the hundredth place, `scale` should be 100.
174 pub scale : f64,
175
176 /// Minimum number of bits required to represent a normalized value without loss of information for the given scale factor.
177 ///
178 /// Example:
179 ///
180 /// - If `low` is -5.02 and `high` is 3.13 then the range is 8.15. If `scale` is 100, the range becomes 815,
181 /// so 10 bits are required to represent all values in that range.
182 pub bits_required : usize
183}
184
185impl FloatDataRange {
186
187 /// Create a `FloatDataRange` without reference to particular data.
188 pub fn new(low : f64, high : f64, scale : f64) -> Self {
189 let scaled_range = ((high - low) * scale).ceil() as u32;
190 FloatDataRange {
191 low,
192 high,
193 scale,
194 bits_required : bits_required(scaled_range)
195 }
196 }
197
198 /// Study all `f64` coordinates in all points to find the minimum and maximum values.
199 pub fn from_f64<I>(points : &[I], scale : f64) -> Self
200 // The "for<'a>" below is "higher-ranked lifetime" notation. Got it from stackoverflow. Confused about what it does but it is needed!
201 where
202 for<'a> &'a I: IntoIterator<Item = &'a f64>,
203 {
204 let mut low = f64::MAX;
205 let mut high = f64::MIN;
206 for point in points.iter() {
207 for coordinate in point.into_iter().map(|c| *c) {
208 if coordinate > high { high = coordinate; }
209 if coordinate < low { low = coordinate; }
210 }
211 }
212 FloatDataRange::new(low, high, scale)
213 }
214
215 /// Range from low to high value.
216 pub fn range(&self) -> f64 { self.high - self.low }
217
218 /// Normalize an `f64` `coordinate` value, shifting it enough so that the minimum value found in any point
219 /// is shifted to zero and the maximum value is shifted to `range`, and using the full number of bits required for the range
220 /// multiplied by the scale.
221 pub fn normalize(&self, coordinate : f64) -> u32 {
222 ((coordinate - self.low) * self.scale).ceil() as u32
223 }
224
225 /// Normalize an `f64` `coordinate` value, shifting it enough so that the minimum value found in any point
226 /// is shifted to zero and the maximum value is shifted to `range`, then optionally compressing the range by bit shifting
227 /// such that no more than the given number of bits are required for the largest value.
228 pub fn compress(&self, coordinate : f64, bits_allocated : usize) -> u32 {
229 let uncompressed = self.normalize(coordinate);
230 if bits_allocated < self.bits_required { uncompressed >> (self.bits_required - bits_allocated) }
231 else { uncompressed }
232 }
233
234}
235
236#[cfg(test)]
237mod tests {
238
239 #[allow(unused_imports)]
240 use spectral::prelude::*;
241 use super::{IntegerDataRange, FloatDataRange};
242
243 #[test]
244 fn from_i32() {
245 let points = test_points_i32();
246 let actual_range = IntegerDataRange::from_i32(&points);
247 let expected_range = IntegerDataRange::new(-100, 100);
248 asserting("Range correct").that(&actual_range).is_equal_to(expected_range);
249 }
250
251 #[test]
252 fn normalize_i32() {
253 let points = test_points_i32();
254 let range = IntegerDataRange::from_i32(&points);
255 let actual_normalized = range.normalize(-16);
256 let expected_normalized = 84;
257 asserting("Normalized value correct").that(&actual_normalized).is_equal_to(expected_normalized);
258 }
259
260 #[test]
261 fn compress_i32() {
262 let points = test_points_i32();
263 let range = IntegerDataRange::from_i32(&points);
264 let actual_compressed = range.compress(-16, 7);
265 let expected_compressed = 42;
266 asserting("Compressed value correct").that(&actual_compressed).is_equal_to(expected_compressed);
267 }
268
269 #[test]
270 fn smallest_power_of_two() {
271 let in_out_pairs = vec![(0_u64,0), (1,0), (2,1), (3,2), (4,2), (5,3), (6,3), (7,3), (8,3), (9,4)];
272 for (n, expected) in in_out_pairs {
273 let actual = super::smallest_power_of_two(n);
274 asserting(&format!("n = {}, actual = {}, expected = {}", n, actual, expected)).that(&actual).is_equal_to(expected);
275 }
276 }
277
278 #[test]
279 fn from_f64() {
280 let points = test_points_f64();
281 let actual_range = FloatDataRange::from_f64(&points, 10.0);
282 let expected_range = FloatDataRange::new(-100.0, 100.19, 10.0);
283 asserting("Range correct").that(&actual_range).is_equal_to(expected_range);
284 }
285
286 #[test]
287 fn normalize_f64() {
288 let points = test_points_f64();
289 let range = FloatDataRange::from_f64(&points, 10.0);
290 let actual_normalized = range.normalize(-16.0);
291 let expected_normalized = 840;
292 asserting("Normalized value correct").that(&actual_normalized).is_equal_to(expected_normalized);
293 }
294
295 /// Compress a value in a range that requires 11 bits down to 7 bits, forcing a division by 16 and loss of precision.
296 #[test]
297 fn compress_f64() {
298 let points = test_points_f64();
299 let range = FloatDataRange::from_f64(&points, 10.0);
300 let actual_compressed = range.compress(-16.0, 7);
301 let expected_compressed = 52;
302 asserting("Compressed value correct").that(&actual_compressed).is_equal_to(expected_compressed);
303 }
304
305 fn test_points_i32() -> Vec<Vec<i32>> {
306 vec![
307 vec![-10, 5, 3],
308 vec![-4, 20, 100],
309 vec![42, -100, 0]
310 ]
311 }
312
313 fn test_points_f64() -> Vec<Vec<f64>> {
314 // Range is 100.9 - -100.0 => 200.9
315 // If scaled by 10, the range is 2009.
316 vec![
317 vec![-10.5, 5.27, 3.66],
318 vec![-4.802, 20.2, 100.19],
319 vec![42.0, -100.0, 0.0]
320 ]
321 }
322
323}