Skip to main content

noxu_bind/tuple/
sort_key.rs

1//! Sort-preserving key encoding trait for database keys.
2//!
3//! Types that implement `SortKey` can be encoded to bytes such that the
4//! lexicographic byte order of the encoded form exactly matches the natural
5//! ordering of the original values. This is the required property for B-tree
6//! keys: byte-wise comparison of serialized keys must agree with `Ord` on the
7//! original values.
8//!
9//! ## Encoding rules
10//!
11//! | Rust type  | Width    | Encoding                                             |
12//! |------------|----------|------------------------------------------------------|
13//! | `bool`     | 1 byte   | `false` → 0x00, `true` → 0x01                       |
14//! | `u8`       | 1 byte   | raw value                                            |
15//! | `i8`       | 1 byte   | value XOR 0x80 (sign-bit flip)                       |
16//! | `u16`      | 2 bytes  | big-endian                                           |
17//! | `i16`      | 2 bytes  | big-endian, sign-bit flipped                         |
18//! | `u32`      | 4 bytes  | big-endian                                           |
19//! | `i32`      | 4 bytes  | big-endian, sign-bit flipped                         |
20//! | `u64`      | 8 bytes  | big-endian                                           |
21//! | `i64`      | 8 bytes  | big-endian, sign-bit flipped                         |
22//! | `f32`      | 4 bytes  | IEEE 754 with sign-conditional bit-flip              |
23//! | `f64`      | 8 bytes  | IEEE 754 with sign-conditional bit-flip              |
24//! | `String`   | variable | UTF-8 bytes, null-escaped, two-byte `[0x00,0x00]` terminator |
25//! | `Vec<u8>`  | variable | raw bytes, null-escaped, two-byte `[0x00,0x00]` terminator   |
26//!
27//! The `f32`/`f64` encoding uses the same IEEE 754 sign-bit manipulation as
28//! `TupleOutput::write_sorted_float`/`write_sorted_double`:
29//! - Negative values: all bits XOR'd (sort before positive)
30//! - Positive values: only the sign bit XOR'd (sort after negative)
31//!
32//! Variable-length types (`String`, `Vec<u8>`) use null-byte escaping so that
33//! composite keys containing multiple variable-length fields remain sortable
34//! and self-delimiting: each embedded `0x00` byte is written as `[0x00, 0x01]`
35//! and the field is terminated with `[0x00, 0x00]`.
36//!
37//! ## Property tests
38//!
39//! Reverse round-trip (decode-then-re-encode is byte-identical) and order-
40//! preservation properties for each `SortKey` impl live in
41//! `crates/noxu-bind/tests/prop_tests.rs` (Wave 11-E).
42
43use crate::error::{BindError, Result};
44use crate::tuple::tuple_input::TupleInput;
45use crate::tuple::tuple_output::TupleOutput;
46
47/// Trait for types whose values can be encoded to and decoded from a
48/// sort-preserving byte representation.
49///
50/// The encoded bytes must satisfy: for any two values `a` and `b` of the same
51/// type, `encode(a) < encode(b)` (lexicographically) if and only if `a < b`.
52///
53/// Implementations must be consistent with `Ord` for the type.
54pub trait SortKey: Sized {
55    /// Writes the sort-preserving encoding of `self` into `output`.
56    fn encode_sort_key(&self, output: &mut TupleOutput);
57
58    /// Reads a value from `input` that was written by `encode_sort_key`.
59    fn decode_sort_key(input: &mut TupleInput) -> Result<Self>;
60}
61
62impl SortKey for bool {
63    fn encode_sort_key(&self, output: &mut TupleOutput) {
64        output.write_bool(*self);
65    }
66    fn decode_sort_key(input: &mut TupleInput) -> Result<Self> {
67        input.read_bool()
68    }
69}
70
71impl SortKey for u8 {
72    fn encode_sort_key(&self, output: &mut TupleOutput) {
73        output.write_u8(*self);
74    }
75    fn decode_sort_key(input: &mut TupleInput) -> Result<Self> {
76        input.read_u8()
77    }
78}
79
80impl SortKey for i8 {
81    fn encode_sort_key(&self, output: &mut TupleOutput) {
82        output.write_i8(*self);
83    }
84    fn decode_sort_key(input: &mut TupleInput) -> Result<Self> {
85        input.read_i8()
86    }
87}
88
89impl SortKey for u16 {
90    fn encode_sort_key(&self, output: &mut TupleOutput) {
91        output.write_u16(*self);
92    }
93    fn decode_sort_key(input: &mut TupleInput) -> Result<Self> {
94        input.read_u16()
95    }
96}
97
98impl SortKey for i16 {
99    fn encode_sort_key(&self, output: &mut TupleOutput) {
100        output.write_i16(*self);
101    }
102    fn decode_sort_key(input: &mut TupleInput) -> Result<Self> {
103        input.read_i16()
104    }
105}
106
107impl SortKey for u32 {
108    fn encode_sort_key(&self, output: &mut TupleOutput) {
109        output.write_u32(*self);
110    }
111    fn decode_sort_key(input: &mut TupleInput) -> Result<Self> {
112        input.read_u32()
113    }
114}
115
116impl SortKey for i32 {
117    fn encode_sort_key(&self, output: &mut TupleOutput) {
118        output.write_i32(*self);
119    }
120    fn decode_sort_key(input: &mut TupleInput) -> Result<Self> {
121        input.read_i32()
122    }
123}
124
125impl SortKey for u64 {
126    fn encode_sort_key(&self, output: &mut TupleOutput) {
127        output.write_u64(*self);
128    }
129    fn decode_sort_key(input: &mut TupleInput) -> Result<Self> {
130        input.read_u64()
131    }
132}
133
134impl SortKey for i64 {
135    fn encode_sort_key(&self, output: &mut TupleOutput) {
136        output.write_i64(*self);
137    }
138    fn decode_sort_key(input: &mut TupleInput) -> Result<Self> {
139        input.read_i64()
140    }
141}
142
143/// `f32` keys use the sort-preserving IEEE 754 encoding: negative values have
144/// all bits flipped; positive values have only the sign bit flipped. This
145/// ensures the full float range sorts correctly as unsigned bytes.
146impl SortKey for f32 {
147    fn encode_sort_key(&self, output: &mut TupleOutput) {
148        output.write_sorted_float(*self);
149    }
150    fn decode_sort_key(input: &mut TupleInput) -> Result<Self> {
151        input.read_sorted_float()
152    }
153}
154
155/// `f64` keys use the sort-preserving IEEE 754 encoding: negative values have
156/// all bits flipped; positive values have only the sign bit flipped.
157impl SortKey for f64 {
158    fn encode_sort_key(&self, output: &mut TupleOutput) {
159        output.write_sorted_double(*self);
160    }
161    fn decode_sort_key(input: &mut TupleInput) -> Result<Self> {
162        input.read_sorted_double()
163    }
164}
165
166/// `String` keys are encoded as null-escaped UTF-8 followed by `[0x00, 0x00]`.
167/// Embedded `0x00` bytes are escaped as `[0x00, 0x01]`. Lexicographic byte
168/// order of encoded strings matches lexicographic string order.
169impl SortKey for String {
170    fn encode_sort_key(&self, output: &mut TupleOutput) {
171        output.write_string(self.as_str());
172    }
173    fn decode_sort_key(input: &mut TupleInput) -> Result<Self> {
174        input.read_string()
175    }
176}
177
178/// `Vec<u8>` keys are encoded as null-escaped raw bytes followed by `[0x00,
179/// 0x00]`. Embedded `0x00` bytes are escaped as `[0x00, 0x01]`.
180/// Lexicographic byte order of encoded `Vec<u8>` values matches
181/// lexicographic byte-slice order.
182impl SortKey for Vec<u8> {
183    fn encode_sort_key(&self, output: &mut TupleOutput) {
184        // Null-escape the bytes, then write the two-byte terminator.
185        for &b in self.iter() {
186            if b == 0x00 {
187                output.write_bytes(&[0x00, 0x01]);
188            } else {
189                output.write_bytes(&[b]);
190            }
191        }
192        output.write_bytes(&[0x00, 0x00]);
193    }
194    fn decode_sort_key(input: &mut TupleInput) -> Result<Self> {
195        let mut decoded: Vec<u8> = Vec::new();
196        loop {
197            let buf = input.get_buffer();
198            let off = input.get_offset();
199            if off >= buf.len() {
200                return Err(BindError::InvalidData(
201                    "no null terminator found for Vec<u8> key".to_string(),
202                ));
203            }
204            let b = buf[off];
205            input.skip(1)?;
206            if b == 0x00 {
207                let buf2 = input.get_buffer();
208                let off2 = input.get_offset();
209                if off2 >= buf2.len() {
210                    return Err(BindError::InvalidData(
211                        "truncated null escape in Vec<u8> key".to_string(),
212                    ));
213                }
214                let next = buf2[off2];
215                input.skip(1)?;
216                if next == 0x00 {
217                    break;
218                } else if next == 0x01 {
219                    decoded.push(0x00);
220                } else {
221                    return Err(BindError::InvalidData(format!(
222                        "invalid null escape byte 0x{:02x} in Vec<u8> key",
223                        next
224                    )));
225                }
226            } else {
227                decoded.push(b);
228            }
229        }
230        Ok(decoded)
231    }
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237
238    fn encode<T: SortKey>(val: &T) -> Vec<u8> {
239        let mut out = TupleOutput::new();
240        val.encode_sort_key(&mut out);
241        out.into_vec()
242    }
243
244    fn decode<T: SortKey>(bytes: &[u8]) -> T {
245        let mut inp = TupleInput::new(bytes);
246        T::decode_sort_key(&mut inp).unwrap()
247    }
248
249    fn round_trip<T: SortKey + PartialEq + std::fmt::Debug>(val: T) -> T {
250        let encoded = encode(&val);
251        decode(&encoded)
252    }
253
254    // --- round-trip tests ---
255
256    #[test]
257    fn test_bool_round_trip() {
258        assert!(!round_trip(false));
259        assert!(round_trip(true));
260    }
261
262    #[test]
263    fn test_u8_round_trip() {
264        assert_eq!(round_trip(0u8), 0);
265        assert_eq!(round_trip(255u8), 255);
266    }
267    #[test]
268    fn test_i8_round_trip() {
269        assert_eq!(round_trip(i8::MIN), i8::MIN);
270        assert_eq!(round_trip(0i8), 0);
271        assert_eq!(round_trip(i8::MAX), i8::MAX);
272    }
273    #[test]
274    fn test_u16_round_trip() {
275        assert_eq!(round_trip(0u16), 0);
276        assert_eq!(round_trip(u16::MAX), u16::MAX);
277    }
278    #[test]
279    fn test_i16_round_trip() {
280        assert_eq!(round_trip(i16::MIN), i16::MIN);
281        assert_eq!(round_trip(0i16), 0);
282        assert_eq!(round_trip(i16::MAX), i16::MAX);
283    }
284    #[test]
285    fn test_u32_round_trip() {
286        assert_eq!(round_trip(0u32), 0);
287        assert_eq!(round_trip(u32::MAX), u32::MAX);
288    }
289    #[test]
290    fn test_i32_round_trip() {
291        assert_eq!(round_trip(i32::MIN), i32::MIN);
292        assert_eq!(round_trip(0i32), 0);
293        assert_eq!(round_trip(i32::MAX), i32::MAX);
294    }
295    #[test]
296    fn test_u64_round_trip() {
297        assert_eq!(round_trip(0u64), 0);
298        assert_eq!(round_trip(u64::MAX), u64::MAX);
299    }
300    #[test]
301    fn test_i64_round_trip() {
302        assert_eq!(round_trip(i64::MIN), i64::MIN);
303        assert_eq!(round_trip(0i64), 0);
304        assert_eq!(round_trip(i64::MAX), i64::MAX);
305    }
306    #[test]
307    fn test_f32_round_trip() {
308        for &v in &[0.0f32, 1.5, -1.5, f32::MAX, f32::MIN] {
309            assert_eq!(round_trip(v).to_bits(), v.to_bits());
310        }
311        assert!(round_trip(f32::NAN).is_nan());
312    }
313    #[test]
314    fn test_f64_round_trip() {
315        for &v in &[0.0f64, 1.5, -1.5, f64::MAX, f64::MIN] {
316            assert_eq!(round_trip(v).to_bits(), v.to_bits());
317        }
318        assert!(round_trip(f64::NAN).is_nan());
319    }
320    #[test]
321    fn test_string_round_trip() {
322        assert_eq!(round_trip("".to_string()), "");
323        assert_eq!(round_trip("hello".to_string()), "hello");
324        let with_null = "a\x00b".to_string();
325        assert_eq!(round_trip(with_null.clone()), with_null);
326    }
327    #[test]
328    fn test_vec_u8_round_trip() {
329        assert_eq!(round_trip(vec![]), Vec::<u8>::new());
330        assert_eq!(round_trip(vec![1u8, 2, 3]), vec![1, 2, 3]);
331        assert_eq!(
332            round_trip(vec![0x00u8, 0x01, 0x00]),
333            vec![0x00, 0x01, 0x00]
334        );
335    }
336
337    // --- sort-order tests ---
338
339    fn assert_order<T: SortKey>(lesser: T, greater: T) {
340        assert!(
341            encode(&lesser) < encode(&greater),
342            "expected encode({:?}) < encode({:?})",
343            encode(&lesser),
344            encode(&greater)
345        );
346    }
347
348    #[test]
349    fn test_sort_order_u64() {
350        for (a, b) in
351            [(0u64, 1), (1, 10), (100, 1000), (u64::MAX - 1, u64::MAX)]
352        {
353            assert_order(a, b);
354        }
355    }
356    #[test]
357    fn test_sort_order_i64() {
358        let vals = [i64::MIN, -1000i64, -1, 0, 1, 1000, i64::MAX];
359        for w in vals.windows(2) {
360            assert_order(w[0], w[1]);
361        }
362    }
363    #[test]
364    fn test_sort_order_u32() {
365        for (a, b) in [(0u32, 1), (1, 100), (u32::MAX - 1, u32::MAX)] {
366            assert_order(a, b);
367        }
368    }
369    #[test]
370    fn test_sort_order_i32() {
371        let vals = [i32::MIN, -1i32, 0, 1, i32::MAX];
372        for w in vals.windows(2) {
373            assert_order(w[0], w[1]);
374        }
375    }
376    #[test]
377    fn test_sort_order_string() {
378        assert_order("a".to_string(), "b".to_string());
379        assert_order("a".to_string(), "aa".to_string());
380        assert_order("abc".to_string(), "abd".to_string());
381    }
382    #[test]
383    fn test_sort_order_vec_u8() {
384        assert_order(vec![0x01u8], vec![0x02u8]);
385        assert_order(vec![0x01u8], vec![0x01u8, 0x00]);
386        assert_order(vec![0xFEu8], vec![0xFFu8]);
387    }
388    #[test]
389    fn test_sort_order_f64() {
390        let vals = [f64::NEG_INFINITY, -1.0f64, 0.0, 1.0, f64::INFINITY];
391        for w in vals.windows(2) {
392            assert_order(w[0], w[1]);
393        }
394    }
395    #[test]
396    fn test_sort_order_i8() {
397        let vals = [i8::MIN, -1i8, 0, 1, i8::MAX];
398        for w in vals.windows(2) {
399            assert_order(w[0], w[1]);
400        }
401    }
402    #[test]
403    fn test_sort_order_bool() {
404        assert_order(false, true);
405    }
406}