1use bytes::Bytes;
19use kimberlite_store::Key;
20use kimberlite_types::Timestamp;
21
22use crate::value::Value;
23
24#[allow(clippy::cast_sign_loss)]
26pub fn encode_tinyint(value: i8) -> [u8; 1] {
27 let unsigned = (value as u8) ^ (1u8 << 7);
28 [unsigned]
29}
30
31#[allow(dead_code)]
33pub fn decode_tinyint(bytes: [u8; 1]) -> i8 {
34 let unsigned = bytes[0];
35 (unsigned ^ (1u8 << 7)) as i8
36}
37
38#[allow(clippy::cast_sign_loss)]
40pub fn encode_smallint(value: i16) -> [u8; 2] {
41 let unsigned = (value as u16) ^ (1u16 << 15);
42 unsigned.to_be_bytes()
43}
44
45#[allow(dead_code)]
47pub fn decode_smallint(bytes: [u8; 2]) -> i16 {
48 let unsigned = u16::from_be_bytes(bytes);
49 (unsigned ^ (1u16 << 15)) as i16
50}
51
52#[allow(clippy::cast_sign_loss)]
54pub fn encode_integer(value: i32) -> [u8; 4] {
55 let unsigned = (value as u32) ^ (1u32 << 31);
56 unsigned.to_be_bytes()
57}
58
59#[allow(dead_code)]
61pub fn decode_integer(bytes: [u8; 4]) -> i32 {
62 let unsigned = u32::from_be_bytes(bytes);
63 (unsigned ^ (1u32 << 31)) as i32
64}
65
66#[allow(clippy::cast_sign_loss)]
79pub fn encode_bigint(value: i64) -> [u8; 8] {
80 let unsigned = (value as u64) ^ (1u64 << 63);
83 unsigned.to_be_bytes()
84}
85
86#[allow(dead_code)]
88pub fn decode_bigint(bytes: [u8; 8]) -> i64 {
89 let unsigned = u64::from_be_bytes(bytes);
90 (unsigned ^ (1u64 << 63)) as i64
91}
92
93pub fn encode_timestamp(ts: Timestamp) -> [u8; 8] {
98 ts.as_nanos().to_be_bytes()
99}
100
101#[allow(dead_code)]
103pub fn decode_timestamp(bytes: [u8; 8]) -> Timestamp {
104 Timestamp::from_nanos(u64::from_be_bytes(bytes))
105}
106
107#[allow(clippy::cast_sign_loss)]
111pub fn encode_real(value: f64) -> [u8; 8] {
112 let bits = value.to_bits();
113
114 let key = if value.is_sign_negative() {
116 !bits } else {
118 bits ^ (1u64 << 63) };
120
121 key.to_be_bytes()
122}
123
124#[allow(dead_code)]
126pub fn decode_real(bytes: [u8; 8]) -> f64 {
127 let key = u64::from_be_bytes(bytes);
128
129 let bits = if (key & (1u64 << 63)) == 0 {
131 !key } else {
133 key ^ (1u64 << 63) };
135
136 f64::from_bits(bits)
137}
138
139#[allow(clippy::cast_sign_loss)]
143pub fn encode_decimal(value: i128, scale: u8) -> [u8; 17] {
144 let unsigned = (value as u128) ^ (1u128 << 127);
145 let mut bytes = [0u8; 17];
146 bytes[0..16].copy_from_slice(&unsigned.to_be_bytes());
147 bytes[16] = scale;
148 bytes
149}
150
151#[allow(dead_code)]
153pub fn decode_decimal(bytes: [u8; 17]) -> (i128, u8) {
154 let mut value_bytes = [0u8; 16];
155 value_bytes.copy_from_slice(&bytes[0..16]);
156 let unsigned = u128::from_be_bytes(value_bytes);
157 let value = (unsigned ^ (1u128 << 127)) as i128;
158 let scale = bytes[16];
159 (value, scale)
160}
161
162#[allow(clippy::cast_sign_loss)]
164pub fn encode_date(value: i32) -> [u8; 4] {
165 encode_integer(value) }
167
168#[allow(dead_code)]
170pub fn decode_date(bytes: [u8; 4]) -> i32 {
171 decode_integer(bytes)
172}
173
174pub fn encode_time(value: i64) -> [u8; 8] {
178 value.to_be_bytes()
179}
180
181#[allow(dead_code)]
183pub fn decode_time(bytes: [u8; 8]) -> i64 {
184 i64::from_be_bytes(bytes)
185}
186
187pub fn encode_uuid(value: [u8; 16]) -> [u8; 16] {
191 value
192}
193
194#[allow(dead_code)]
196pub fn decode_uuid(bytes: [u8; 16]) -> [u8; 16] {
197 bytes
198}
199
200pub fn encode_boolean(value: bool) -> [u8; 1] {
202 [u8::from(value)]
203}
204
205#[allow(dead_code)]
207pub fn decode_boolean(byte: u8) -> bool {
208 byte != 0
209}
210
211pub fn encode_key(values: &[Value]) -> Key {
232 let mut buf = Vec::with_capacity(64);
233
234 for value in values {
235 match value {
236 Value::Null => {
237 buf.push(0x00); }
239 Value::BigInt(v) => {
240 buf.push(0x01); buf.extend_from_slice(&encode_bigint(*v));
242 }
243 Value::Text(s) => {
244 buf.push(0x02); for &byte in s.as_bytes() {
246 if byte == 0x00 {
247 buf.push(0x00);
248 buf.push(0xFF); } else {
250 buf.push(byte);
251 }
252 }
253 buf.push(0x00); }
255 Value::Boolean(b) => {
256 buf.push(0x03); buf.extend_from_slice(&encode_boolean(*b));
258 }
259 Value::Timestamp(ts) => {
260 buf.push(0x04); buf.extend_from_slice(&encode_timestamp(*ts));
262 }
263 Value::Bytes(b) => {
264 buf.push(0x05); for &byte in b {
266 if byte == 0x00 {
267 buf.push(0x00);
268 buf.push(0xFF); } else {
270 buf.push(byte);
271 }
272 }
273 buf.push(0x00); }
275 Value::Integer(v) => {
276 buf.push(0x06); buf.extend_from_slice(&encode_integer(*v));
278 }
279 Value::SmallInt(v) => {
280 buf.push(0x07); buf.extend_from_slice(&encode_smallint(*v));
282 }
283 Value::TinyInt(v) => {
284 buf.push(0x08); buf.extend_from_slice(&encode_tinyint(*v));
286 }
287 Value::Real(v) => {
288 buf.push(0x09); buf.extend_from_slice(&encode_real(*v));
290 }
291 Value::Decimal(v, scale) => {
292 buf.push(0x0A); buf.extend_from_slice(&encode_decimal(*v, *scale));
294 }
295 Value::Uuid(u) => {
296 buf.push(0x0B); buf.extend_from_slice(&encode_uuid(*u));
298 }
299 Value::Json(_) => {
300 panic!(
301 "JSON values cannot be used in primary keys or indexes - they are not orderable"
302 )
303 }
304 Value::Date(d) => {
305 buf.push(0x0D); buf.extend_from_slice(&encode_date(*d));
307 }
308 Value::Time(t) => {
309 buf.push(0x0E); buf.extend_from_slice(&encode_time(*t));
311 }
312 Value::Placeholder(idx) => {
313 panic!("Cannot encode unbound placeholder ${idx} - bind parameters first")
314 }
315 }
316 }
317
318 Key::from(buf)
319}
320
321#[allow(dead_code)]
327#[inline]
329fn decode_bigint_value(bytes: &[u8], pos: &mut usize) -> Value {
330 debug_assert!(
331 *pos + 8 <= bytes.len(),
332 "insufficient bytes for BigInt at position {pos}"
333 );
334 let arr: [u8; 8] = bytes[*pos..*pos + 8]
335 .try_into()
336 .expect("BigInt decode failed");
337 *pos += 8;
338 Value::BigInt(decode_bigint(arr))
339}
340
341#[inline]
343fn decode_text_value(bytes: &[u8], pos: &mut usize) -> Value {
344 let mut result = Vec::new();
345
346 while *pos < bytes.len() {
347 debug_assert!(*pos <= bytes.len(), "position out of bounds");
348 let byte = bytes[*pos];
349 *pos += 1;
350
351 if byte == 0x00 {
352 if *pos < bytes.len() && bytes[*pos] == 0xFF {
353 result.push(0x00); *pos += 1;
355 } else {
356 break; }
358 } else {
359 result.push(byte);
360 }
361 }
362
363 let s = std::str::from_utf8(&result).expect("Text decode failed: invalid UTF-8");
364 debug_assert!(
365 std::str::from_utf8(&result).is_ok(),
366 "decoded text must be valid UTF-8"
367 );
368 Value::Text(s.to_string())
369}
370
371#[inline]
373fn decode_boolean_value(bytes: &[u8], pos: &mut usize) -> Value {
374 debug_assert!(
375 *pos < bytes.len(),
376 "insufficient bytes for Boolean at position {pos}"
377 );
378 let b = decode_boolean(bytes[*pos]);
379 *pos += 1;
380 Value::Boolean(b)
381}
382
383#[inline]
385fn decode_timestamp_value(bytes: &[u8], pos: &mut usize) -> Value {
386 debug_assert!(
387 *pos + 8 <= bytes.len(),
388 "insufficient bytes for Timestamp at position {pos}"
389 );
390 let arr: [u8; 8] = bytes[*pos..*pos + 8]
391 .try_into()
392 .expect("Timestamp decode failed");
393 *pos += 8;
394 Value::Timestamp(decode_timestamp(arr))
395}
396
397#[inline]
399fn decode_bytes_value(bytes: &[u8], pos: &mut usize) -> Value {
400 let mut result = Vec::new();
401
402 while *pos < bytes.len() {
403 debug_assert!(*pos <= bytes.len(), "position out of bounds");
404 let byte = bytes[*pos];
405 *pos += 1;
406
407 if byte == 0x00 {
408 if *pos < bytes.len() && bytes[*pos] == 0xFF {
409 result.push(0x00); *pos += 1;
411 } else {
412 break; }
414 } else {
415 result.push(byte);
416 }
417 }
418
419 Value::Bytes(Bytes::from(result))
420}
421
422#[inline]
424fn decode_integer_value(bytes: &[u8], pos: &mut usize) -> Value {
425 debug_assert!(
426 *pos + 4 <= bytes.len(),
427 "insufficient bytes for Integer at position {pos}"
428 );
429 let arr: [u8; 4] = bytes[*pos..*pos + 4]
430 .try_into()
431 .expect("Integer decode failed");
432 *pos += 4;
433 Value::Integer(decode_integer(arr))
434}
435
436#[inline]
438fn decode_smallint_value(bytes: &[u8], pos: &mut usize) -> Value {
439 debug_assert!(
440 *pos + 2 <= bytes.len(),
441 "insufficient bytes for SmallInt at position {pos}"
442 );
443 let arr: [u8; 2] = bytes[*pos..*pos + 2]
444 .try_into()
445 .expect("SmallInt decode failed");
446 *pos += 2;
447 Value::SmallInt(decode_smallint(arr))
448}
449
450#[inline]
452fn decode_tinyint_value(bytes: &[u8], pos: &mut usize) -> Value {
453 debug_assert!(
454 *pos < bytes.len(),
455 "insufficient bytes for TinyInt at position {pos}"
456 );
457 let arr: [u8; 1] = [bytes[*pos]];
458 *pos += 1;
459 Value::TinyInt(decode_tinyint(arr))
460}
461
462#[inline]
464fn decode_real_value(bytes: &[u8], pos: &mut usize) -> Value {
465 debug_assert!(
466 *pos + 8 <= bytes.len(),
467 "insufficient bytes for Real at position {pos}"
468 );
469 let arr: [u8; 8] = bytes[*pos..*pos + 8]
470 .try_into()
471 .expect("Real decode failed");
472 *pos += 8;
473 Value::Real(decode_real(arr))
474}
475
476#[inline]
478fn decode_decimal_value(bytes: &[u8], pos: &mut usize) -> Value {
479 debug_assert!(
480 *pos + 17 <= bytes.len(),
481 "insufficient bytes for Decimal at position {pos}"
482 );
483 let arr: [u8; 17] = bytes[*pos..*pos + 17]
484 .try_into()
485 .expect("Decimal decode failed");
486 *pos += 17;
487 let (val, scale) = decode_decimal(arr);
488 Value::Decimal(val, scale)
489}
490
491#[inline]
493fn decode_uuid_value(bytes: &[u8], pos: &mut usize) -> Value {
494 debug_assert!(
495 *pos + 16 <= bytes.len(),
496 "insufficient bytes for Uuid at position {pos}"
497 );
498 let arr: [u8; 16] = bytes[*pos..*pos + 16]
499 .try_into()
500 .expect("Uuid decode failed");
501 *pos += 16;
502 Value::Uuid(decode_uuid(arr))
503}
504
505#[inline]
507fn decode_date_value(bytes: &[u8], pos: &mut usize) -> Value {
508 debug_assert!(
509 *pos + 4 <= bytes.len(),
510 "insufficient bytes for Date at position {pos}"
511 );
512 let arr: [u8; 4] = bytes[*pos..*pos + 4]
513 .try_into()
514 .expect("Date decode failed");
515 *pos += 4;
516 Value::Date(decode_date(arr))
517}
518
519#[inline]
521fn decode_time_value(bytes: &[u8], pos: &mut usize) -> Value {
522 debug_assert!(
523 *pos + 8 <= bytes.len(),
524 "insufficient bytes for Time at position {pos}"
525 );
526 let arr: [u8; 8] = bytes[*pos..*pos + 8]
527 .try_into()
528 .expect("Time decode failed");
529 *pos += 8;
530 Value::Time(decode_time(arr))
531}
532
533pub fn decode_key(key: &Key) -> Vec<Value> {
534 let bytes = key.as_bytes();
535 let mut values = Vec::new();
536 let mut pos = 0;
537
538 while pos < bytes.len() {
539 let tag = bytes[pos];
540 pos += 1;
541
542 let value = match tag {
543 0x00 => Value::Null,
544 0x01 => decode_bigint_value(bytes, &mut pos),
545 0x02 => decode_text_value(bytes, &mut pos),
546 0x03 => decode_boolean_value(bytes, &mut pos),
547 0x04 => decode_timestamp_value(bytes, &mut pos),
548 0x05 => decode_bytes_value(bytes, &mut pos),
549 0x06 => decode_integer_value(bytes, &mut pos),
550 0x07 => decode_smallint_value(bytes, &mut pos),
551 0x08 => decode_tinyint_value(bytes, &mut pos),
552 0x09 => decode_real_value(bytes, &mut pos),
553 0x0A => decode_decimal_value(bytes, &mut pos),
554 0x0B => decode_uuid_value(bytes, &mut pos),
555 0x0C => panic!("JSON values cannot be decoded from keys - they are not indexable"),
556 0x0D => decode_date_value(bytes, &mut pos),
557 0x0E => decode_time_value(bytes, &mut pos),
558 _ => panic!("unknown type tag {tag:#04x} at position {}", pos - 1),
559 };
560
561 values.push(value);
562 }
563
564 values
565}
566
567#[allow(dead_code)]
571pub fn min_key_for_type(count: usize) -> Key {
572 let values: Vec<Value> = (0..count).map(|_| Value::Null).collect();
573 encode_key(&values)
574}
575
576pub fn successor_key(key: &Key) -> Key {
580 let bytes = key.as_bytes();
581 let mut result = bytes.to_vec();
582
583 for i in (0..result.len()).rev() {
585 if result[i] < 0xFF {
586 result[i] += 1;
587 return Key::from(result);
588 }
589 result[i] = 0x00;
590 }
591
592 result.push(0x00);
594 Key::from(result)
595}
596
597#[cfg(test)]
598mod tests {
599 use super::*;
600
601 #[test]
602 fn test_bigint_encoding_preserves_order() {
603 let values = [
604 i64::MIN,
605 i64::MIN + 1,
606 -1000,
607 -1,
608 0,
609 1,
610 1000,
611 i64::MAX - 1,
612 i64::MAX,
613 ];
614
615 let encoded: Vec<_> = values.iter().map(|&v| encode_bigint(v)).collect();
616 let mut sorted = encoded.clone();
617 sorted.sort_unstable();
618
619 assert_eq!(encoded, sorted, "BigInt encoding should preserve ordering");
620
621 for &v in &values {
623 assert_eq!(decode_bigint(encode_bigint(v)), v);
624 }
625 }
626
627 #[test]
628 fn test_timestamp_encoding_preserves_order() {
629 let values = [0u64, 1, 1000, u64::MAX / 2, u64::MAX];
630
631 let encoded: Vec<_> = values
632 .iter()
633 .map(|&v| encode_timestamp(Timestamp::from_nanos(v)))
634 .collect();
635 let mut sorted = encoded.clone();
636 sorted.sort_unstable();
637
638 assert_eq!(
639 encoded, sorted,
640 "Timestamp encoding should preserve ordering"
641 );
642
643 for &v in &values {
645 let ts = Timestamp::from_nanos(v);
646 assert_eq!(decode_timestamp(encode_timestamp(ts)), ts);
647 }
648 }
649
650 #[test]
651 fn test_composite_key_round_trip() {
652 let values = vec![
653 Value::BigInt(42),
654 Value::Text("hello".to_string()),
655 Value::Boolean(true),
656 Value::Timestamp(Timestamp::from_nanos(12345)),
657 Value::Bytes(Bytes::from_static(b"data")),
658 ];
659
660 let key = encode_key(&values);
661 let decoded = decode_key(&key);
662
663 assert_eq!(values, decoded);
664 }
665
666 #[test]
667 fn test_composite_key_ordering() {
668 let key1 = encode_key(&[Value::BigInt(1), Value::BigInt(1)]);
670 let key2 = encode_key(&[Value::BigInt(1), Value::BigInt(2)]);
671 let key3 = encode_key(&[Value::BigInt(2), Value::BigInt(1)]);
672
673 assert!(key1 < key2, "key1 should be less than key2");
674 assert!(key2 < key3, "key2 should be less than key3");
675 }
676
677 #[test]
678 fn test_successor_key() {
679 let key = encode_key(&[Value::BigInt(42)]);
680 let succ = successor_key(&key);
681
682 assert!(key < succ, "successor should be greater");
683 }
684
685 #[test]
686 fn test_null_handling() {
687 let key = encode_key(&[Value::Null]);
688 let decoded = decode_key(&key);
689 assert_eq!(decoded, vec![Value::Null]);
690 }
691
692 #[test]
693 fn test_text_ordering_original_bug_case() {
694 let short = encode_key(&[Value::Text("b".to_string())]);
696 let long = encode_key(&[Value::Text("aaaaaaa".to_string())]);
697 assert!(
698 long < short,
699 "aaaaaaa should be < b in lexicographic ordering"
700 );
701 }
702
703 #[test]
704 fn test_text_with_embedded_nulls() {
705 let cases = ["abc", "a\0bc", "a\0\0bc", "\0abc", "abc\0"];
706
707 for s in &cases {
709 let key = encode_key(&[Value::Text((*s).to_string())]);
710 let decoded = decode_key(&key);
711 assert_eq!(
712 decoded,
713 vec![Value::Text((*s).to_string())],
714 "Failed to round-trip: {s:?}"
715 );
716 }
717
718 let keys: Vec<_> = cases
720 .iter()
721 .map(|s| encode_key(&[Value::Text((*s).to_string())]))
722 .collect();
723 for i in 0..keys.len() - 1 {
724 assert_eq!(
725 cases[i].cmp(cases[i + 1]),
726 keys[i].cmp(&keys[i + 1]),
727 "Ordering not preserved between {:?} and {:?}",
728 cases[i],
729 cases[i + 1]
730 );
731 }
732 }
733
734 #[test]
735 fn test_bytes_with_embedded_nulls() {
736 let cases: &[&[u8]] = &[b"abc", b"a\0bc", b"a\0\0bc", b"\0abc", b"abc\0"];
737
738 for &data in cases {
740 let key = encode_key(&[Value::Bytes(Bytes::from(data))]);
741 let decoded = decode_key(&key);
742 assert_eq!(
743 decoded,
744 vec![Value::Bytes(Bytes::from(data))],
745 "Failed to round-trip: {data:?}"
746 );
747 }
748
749 let keys: Vec<_> = cases
751 .iter()
752 .map(|&data| encode_key(&[Value::Bytes(Bytes::from(data))]))
753 .collect();
754 for i in 0..keys.len() - 1 {
755 assert_eq!(
756 cases[i].cmp(cases[i + 1]),
757 keys[i].cmp(&keys[i + 1]),
758 "Ordering not preserved between {:?} and {:?}",
759 cases[i],
760 cases[i + 1]
761 );
762 }
763 }
764
765 #[test]
766 fn test_empty_text_and_bytes() {
767 let text = encode_key(&[Value::Text(String::new())]);
768 let bytes = encode_key(&[Value::Bytes(Bytes::new())]);
769
770 assert_eq!(decode_key(&text), vec![Value::Text(String::new())]);
771 assert_eq!(decode_key(&bytes), vec![Value::Bytes(Bytes::new())]);
772 }
773
774 #[test]
775 fn test_composite_key_text_ordering() {
776 let k1 = encode_key(&[Value::BigInt(1), Value::Text("aaa".to_string())]);
777 let k2 = encode_key(&[Value::BigInt(1), Value::Text("z".to_string())]);
778 let k3 = encode_key(&[Value::BigInt(2), Value::Text("a".to_string())]);
779
780 assert!(k1 < k2, "aaa should be < z");
781 assert!(k2 < k3, "1,z should be < 2,a");
782 }
783
784 #[test]
785 fn test_text_ordering_various_lengths() {
786 let cases = ["a", "aa", "aaa", "b", "ba", "baa"];
787
788 let keys: Vec<_> = cases
789 .iter()
790 .map(|s| encode_key(&[Value::Text((*s).to_string())]))
791 .collect();
792
793 for i in 0..keys.len() - 1 {
794 assert_eq!(
795 cases[i].cmp(cases[i + 1]),
796 keys[i].cmp(&keys[i + 1]),
797 "Ordering not preserved between {:?} and {:?}",
798 cases[i],
799 cases[i + 1]
800 );
801 }
802 }
803
804 #[test]
805 fn test_bytes_ordering_with_high_byte_values() {
806 let cases: &[&[u8]] = &[
807 &[0x00],
808 &[0x00, 0x00],
809 &[0x01],
810 &[0x7F],
811 &[0xFF],
812 &[0xFF, 0x00],
813 &[0xFF, 0xFE],
814 &[0xFF, 0xFF],
815 ];
816
817 let keys: Vec<_> = cases
818 .iter()
819 .map(|&data| encode_key(&[Value::Bytes(Bytes::from(data))]))
820 .collect();
821
822 for i in 0..keys.len() - 1 {
823 assert_eq!(
824 cases[i].cmp(cases[i + 1]),
825 keys[i].cmp(&keys[i + 1]),
826 "Ordering not preserved between {:?} and {:?}",
827 cases[i],
828 cases[i + 1]
829 );
830 }
831 }
832}