Skip to main content

reifydb_core/util/encoding/keycode/
deserializer.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright (c) 2025 ReifyDB
3
4use num_bigint::{BigInt, Sign};
5use reifydb_type::{
6	Result,
7	error::{Error, TypeError},
8	value::{
9		Value,
10		blob::Blob,
11		date::Date,
12		datetime::DateTime,
13		decimal::Decimal,
14		duration::Duration,
15		identity::IdentityId,
16		int::Int,
17		ordered_f32::OrderedF32,
18		ordered_f64::OrderedF64,
19		row_number::RowNumber,
20		time::Time,
21		uint::Uint,
22		uuid::{Uuid4, Uuid7},
23	},
24};
25use uuid::Uuid;
26
27use super::{catalog, deserialize};
28use crate::interface::catalog::{id::IndexId, primitive::PrimitiveId};
29
30pub struct KeyDeserializer<'a> {
31	buffer: &'a [u8],
32	position: usize,
33}
34
35impl<'a> KeyDeserializer<'a> {
36	pub fn from_bytes(buffer: &'a [u8]) -> Self {
37		Self {
38			buffer,
39			position: 0,
40		}
41	}
42
43	pub fn remaining(&self) -> usize {
44		self.buffer.len().saturating_sub(self.position)
45	}
46
47	pub fn is_empty(&self) -> bool {
48		self.remaining() == 0
49	}
50
51	pub fn position(&self) -> usize {
52		self.position
53	}
54
55	fn read_exact(&mut self, count: usize) -> Result<&'a [u8]> {
56		if self.remaining() < count {
57			return Err(Error::from(TypeError::SerdeKeycode {
58				message: format!(
59					"unexpected end of key at position {}: need {} bytes, have {}",
60					self.position,
61					count,
62					self.remaining()
63				),
64			}));
65		}
66		let start = self.position;
67		self.position += count;
68		Ok(&self.buffer[start..self.position])
69	}
70
71	pub fn read_bool(&mut self) -> Result<bool> {
72		let bytes = self.read_exact(1)?;
73		Ok(deserialize::<bool>(bytes)?)
74	}
75
76	pub fn read_f32(&mut self) -> Result<f32> {
77		let bytes = self.read_exact(4)?;
78		Ok(deserialize::<f32>(bytes)?)
79	}
80
81	pub fn read_f64(&mut self) -> Result<f64> {
82		let bytes = self.read_exact(8)?;
83		Ok(deserialize::<f64>(bytes)?)
84	}
85
86	pub fn read_i8(&mut self) -> Result<i8> {
87		let bytes = self.read_exact(1)?;
88		Ok(deserialize::<i8>(bytes)?)
89	}
90
91	pub fn read_i16(&mut self) -> Result<i16> {
92		let bytes = self.read_exact(2)?;
93		Ok(deserialize::<i16>(bytes)?)
94	}
95
96	pub fn read_i32(&mut self) -> Result<i32> {
97		let bytes = self.read_exact(4)?;
98		Ok(deserialize::<i32>(bytes)?)
99	}
100
101	pub fn read_i64(&mut self) -> Result<i64> {
102		let bytes = self.read_exact(8)?;
103		Ok(deserialize::<i64>(bytes)?)
104	}
105
106	pub fn read_i128(&mut self) -> Result<i128> {
107		let bytes = self.read_exact(16)?;
108		Ok(deserialize::<i128>(bytes)?)
109	}
110
111	pub fn read_u8(&mut self) -> Result<u8> {
112		let bytes = self.read_exact(1)?;
113		Ok(deserialize::<u8>(bytes)?)
114	}
115
116	pub fn read_u16(&mut self) -> Result<u16> {
117		let bytes = self.read_exact(2)?;
118		Ok(deserialize::<u16>(bytes)?)
119	}
120
121	pub fn read_u32(&mut self) -> Result<u32> {
122		let bytes = self.read_exact(4)?;
123		Ok(deserialize::<u32>(bytes)?)
124	}
125
126	pub fn read_u64(&mut self) -> Result<u64> {
127		let bytes = self.read_exact(8)?;
128		Ok(deserialize::<u64>(bytes)?)
129	}
130
131	pub fn read_u128(&mut self) -> Result<u128> {
132		let bytes = self.read_exact(16)?;
133		Ok(deserialize::<u128>(bytes)?)
134	}
135
136	pub fn read_bytes(&mut self) -> Result<Vec<u8>> {
137		let mut result = Vec::new();
138		loop {
139			if self.remaining() < 1 {
140				return Err(Error::from(TypeError::SerdeKeycode {
141					message: format!(
142						"unexpected end of key at position {}: bytes not terminated",
143						self.position
144					),
145				}));
146			}
147			let byte = self.buffer[self.position];
148			self.position += 1;
149
150			if byte == 0xff {
151				if self.remaining() < 1 {
152					return Err(Error::from(TypeError::SerdeKeycode {
153						message: format!(
154							"unexpected end of key at position {}: incomplete escape sequence",
155							self.position
156						),
157					}));
158				}
159				let next_byte = self.buffer[self.position];
160				self.position += 1;
161
162				if next_byte == 0x00 {
163					result.push(0xff);
164				} else if next_byte == 0xff {
165					break;
166				} else {
167					return Err(Error::from(TypeError::SerdeKeycode {
168						message: format!(
169							"invalid escape sequence at position {}: 0xff 0x{:02x}",
170							self.position - 1,
171							next_byte
172						),
173					}));
174				}
175			} else {
176				result.push(byte);
177			}
178		}
179		Ok(result)
180	}
181
182	pub fn read_str(&mut self) -> Result<String> {
183		let bytes = self.read_bytes()?;
184		String::from_utf8(bytes).map_err(|e| {
185			Error::from(TypeError::SerdeKeycode {
186				message: format!("invalid UTF-8 in key at position {}: {}", self.position, e),
187			})
188		})
189	}
190
191	pub fn read_primitive_id(&mut self) -> Result<PrimitiveId> {
192		let bytes = self.read_exact(9)?;
193		catalog::deserialize_primitive_id(bytes)
194	}
195
196	pub fn read_index_id(&mut self) -> Result<IndexId> {
197		let bytes = self.read_exact(9)?;
198		catalog::deserialize_index_id(bytes)
199	}
200
201	pub fn read_date(&mut self) -> Result<Date> {
202		let days = self.read_i32()?;
203		Date::from_days_since_epoch(days).ok_or_else(|| {
204			Error::from(TypeError::SerdeKeycode {
205				message: format!(
206					"invalid date at position {}: {} days since epoch",
207					self.position, days
208				),
209			})
210		})
211	}
212
213	pub fn read_datetime(&mut self) -> Result<DateTime> {
214		let nanos = self.read_i64()?;
215		Ok(DateTime::from_nanos_since_epoch(nanos))
216	}
217
218	pub fn read_time(&mut self) -> Result<Time> {
219		let nanos = self.read_u64()?;
220		Time::from_nanos_since_midnight(nanos).ok_or_else(|| {
221			Error::from(TypeError::SerdeKeycode {
222				message: format!(
223					"invalid time at position {}: {} nanos since midnight",
224					self.position, nanos
225				),
226			})
227		})
228	}
229
230	pub fn read_duration(&mut self) -> Result<Duration> {
231		let nanos = self.read_i64()?;
232		Ok(Duration::from_nanoseconds(nanos))
233	}
234
235	pub fn read_row_number(&mut self) -> Result<RowNumber> {
236		let value = self.read_u64()?;
237		Ok(RowNumber(value))
238	}
239
240	pub fn read_identity_id(&mut self) -> Result<IdentityId> {
241		let bytes = self.read_bytes()?;
242		let uuid = Uuid::from_slice(&bytes).map_err(|e| {
243			Error::from(TypeError::SerdeKeycode {
244				message: format!("invalid IdentityId at position {}: {}", self.position, e),
245			})
246		})?;
247		Ok(IdentityId::from(Uuid7::from(uuid)))
248	}
249
250	pub fn read_uuid4(&mut self) -> Result<Uuid4> {
251		let bytes = self.read_bytes()?;
252		let uuid = Uuid::from_slice(&bytes).map_err(|e| {
253			Error::from(TypeError::SerdeKeycode {
254				message: format!("invalid Uuid4 at position {}: {}", self.position, e),
255			})
256		})?;
257		Ok(Uuid4::from(uuid))
258	}
259
260	pub fn read_uuid7(&mut self) -> Result<Uuid7> {
261		let bytes = self.read_bytes()?;
262		let uuid = Uuid::from_slice(&bytes).map_err(|e| {
263			Error::from(TypeError::SerdeKeycode {
264				message: format!("invalid Uuid7 at position {}: {}", self.position, e),
265			})
266		})?;
267		Ok(Uuid7::from(uuid))
268	}
269
270	pub fn read_blob(&mut self) -> Result<Blob> {
271		let bytes = self.read_bytes()?;
272		Ok(Blob::from(bytes))
273	}
274
275	pub fn read_int(&mut self) -> Result<Int> {
276		let sign = self.read_exact(1)?[0];
277		let len = self.read_u32()? as usize;
278		let bytes = self.read_exact(len)?;
279
280		let sign = match sign {
281			0 => Sign::Minus,
282			_ => Sign::Plus,
283		};
284
285		Ok(Int(BigInt::from_bytes_be(sign, bytes)))
286	}
287
288	pub fn read_uint(&mut self) -> Result<Uint> {
289		let len = self.read_u32()? as usize;
290		let bytes = self.read_exact(len)?;
291		Ok(Uint(BigInt::from_bytes_be(Sign::Plus, bytes)))
292	}
293
294	pub fn read_decimal(&mut self) -> Result<Decimal> {
295		let s = self.read_str()?;
296		s.parse::<Decimal>().map_err(|e| {
297			Error::from(TypeError::SerdeKeycode {
298				message: format!("invalid Decimal at position {}: {}", self.position, e),
299			})
300		})
301	}
302
303	pub fn read_value(&mut self) -> Result<Value> {
304		if self.remaining() < 1 {
305			return Err(Error::from(TypeError::SerdeKeycode {
306				message: format!(
307					"unexpected end of key at position {}: cannot read value type",
308					self.position
309				),
310			}));
311		}
312
313		let type_marker = self.buffer[self.position];
314		self.position += 1;
315
316		match type_marker {
317			0x00 => {
318				if self.remaining() > 0 && self.buffer[self.position] == 0x00 {
319					Ok(Value::Boolean(true))
320				} else {
321					Ok(Value::none())
322				}
323			}
324			0x01 => {
325				let b = self.read_bool()?;
326				Ok(Value::Boolean(b))
327			}
328			0x02 => {
329				let f = self.read_f32()?;
330				Ok(Value::Float4(OrderedF32::try_from(f).map_err(|e| {
331					Error::from(TypeError::SerdeKeycode {
332						message: format!("invalid f32 at position {}: {}", self.position, e),
333					})
334				})?))
335			}
336			0x03 => {
337				let f = self.read_f64()?;
338				Ok(Value::Float8(OrderedF64::try_from(f).map_err(|e| {
339					Error::from(TypeError::SerdeKeycode {
340						message: format!("invalid f64 at position {}: {}", self.position, e),
341					})
342				})?))
343			}
344			0x04 => {
345				let i = self.read_i8()?;
346				Ok(Value::Int1(i))
347			}
348			0x05 => {
349				let i = self.read_i16()?;
350				Ok(Value::Int2(i))
351			}
352			0x06 => {
353				let i = self.read_i32()?;
354				Ok(Value::Int4(i))
355			}
356			0x07 => {
357				let i = self.read_i64()?;
358				Ok(Value::Int8(i))
359			}
360			0x08 => {
361				let i = self.read_i128()?;
362				Ok(Value::Int16(i))
363			}
364			0x09 => {
365				let s = self.read_str()?;
366				Ok(Value::Utf8(s))
367			}
368			0x0a => {
369				let u = self.read_u8()?;
370				Ok(Value::Uint1(u))
371			}
372			0x0b => {
373				let u = self.read_u16()?;
374				Ok(Value::Uint2(u))
375			}
376			0x0c => {
377				let u = self.read_u32()?;
378				Ok(Value::Uint4(u))
379			}
380			0x0d => {
381				let u = self.read_u64()?;
382				Ok(Value::Uint8(u))
383			}
384			0x0e => {
385				let u = self.read_u128()?;
386				Ok(Value::Uint16(u))
387			}
388			0x0f => {
389				let d = self.read_date()?;
390				Ok(Value::Date(d))
391			}
392			0x10 => {
393				let dt = self.read_datetime()?;
394				Ok(Value::DateTime(dt))
395			}
396			0x11 => {
397				let t = self.read_time()?;
398				Ok(Value::Time(t))
399			}
400			0x12 => {
401				let i = self.read_duration()?;
402				Ok(Value::Duration(i))
403			}
404			// 0x13 was RowNumber, now reserved
405			0x13 => panic!("Type code 0x13 (RowNumber) is no longer supported"),
406			0x14 => {
407				let id = self.read_identity_id()?;
408				Ok(Value::IdentityId(id))
409			}
410			0x15 => {
411				let u = self.read_uuid4()?;
412				Ok(Value::Uuid4(u))
413			}
414			0x16 => {
415				let u = self.read_uuid7()?;
416				Ok(Value::Uuid7(u))
417			}
418			0x17 => {
419				let b = self.read_blob()?;
420				Ok(Value::Blob(b))
421			}
422			0x18 => {
423				let i = self.read_int()?;
424				Ok(Value::Int(i))
425			}
426			0x19 => {
427				let u = self.read_uint()?;
428				Ok(Value::Uint(u))
429			}
430			0x1a => {
431				let d = self.read_decimal()?;
432				Ok(Value::Decimal(d))
433			}
434			_ => Err(Error::from(TypeError::SerdeKeycode {
435				message: format!(
436					"unknown value type marker 0x{:02x} at position {}",
437					type_marker,
438					self.position - 1
439				),
440			})),
441		}
442	}
443
444	pub fn read_raw(&mut self, count: usize) -> Result<&'a [u8]> {
445		self.read_exact(count)
446	}
447}
448
449#[cfg(test)]
450pub mod tests {
451	use std::f64::consts::E;
452
453	use reifydb_type::value::{
454		date::Date, datetime::DateTime, duration::Duration, row_number::RowNumber, time::Time,
455	};
456
457	use crate::{
458		interface::catalog::{id::IndexId, primitive::PrimitiveId},
459		util::encoding::keycode::{deserializer::KeyDeserializer, serializer::KeySerializer},
460	};
461
462	#[test]
463	fn test_read_bool() {
464		let mut ser = KeySerializer::new();
465		ser.extend_bool(true).extend_bool(false);
466		let bytes = ser.finish();
467
468		let mut de = KeyDeserializer::from_bytes(&bytes);
469		assert_eq!(de.read_bool().unwrap(), true);
470		assert_eq!(de.read_bool().unwrap(), false);
471		assert!(de.is_empty());
472	}
473
474	#[test]
475	fn test_read_integers() {
476		let mut ser = KeySerializer::new();
477		ser.extend_i8(-42i8).extend_i16(-1000i16).extend_i32(100000i32).extend_i64(-1000000000i64);
478		let bytes = ser.finish();
479
480		let mut de = KeyDeserializer::from_bytes(&bytes);
481		assert_eq!(de.read_i8().unwrap(), -42);
482		assert_eq!(de.read_i16().unwrap(), -1000);
483		assert_eq!(de.read_i32().unwrap(), 100000);
484		assert_eq!(de.read_i64().unwrap(), -1000000000);
485		assert!(de.is_empty());
486	}
487
488	#[test]
489	fn test_read_unsigned() {
490		let mut ser = KeySerializer::new();
491		ser.extend_u8(255u8).extend_u16(65535u16).extend_u32(4294967295u32).extend_u64(18446744073709551615u64);
492		let bytes = ser.finish();
493
494		let mut de = KeyDeserializer::from_bytes(&bytes);
495		assert_eq!(de.read_u8().unwrap(), 255);
496		assert_eq!(de.read_u16().unwrap(), 65535);
497		assert_eq!(de.read_u32().unwrap(), 4294967295);
498		assert_eq!(de.read_u64().unwrap(), 18446744073709551615);
499		assert!(de.is_empty());
500	}
501
502	#[test]
503	fn test_read_floats() {
504		let mut ser = KeySerializer::new();
505		ser.extend_f32(3.14).extend_f64(E);
506		let bytes = ser.finish();
507
508		let mut de = KeyDeserializer::from_bytes(&bytes);
509		assert!((de.read_f32().unwrap() - 3.14).abs() < 0.001);
510		assert!((de.read_f64().unwrap() - E).abs() < 0.000001);
511		assert!(de.is_empty());
512	}
513
514	#[test]
515	fn test_read_bytes() {
516		let mut ser = KeySerializer::new();
517		ser.extend_bytes(b"hello").extend_bytes(&[0x01, 0xff, 0x02]);
518		let bytes = ser.finish();
519
520		let mut de = KeyDeserializer::from_bytes(&bytes);
521		assert_eq!(de.read_bytes().unwrap(), b"hello");
522		assert_eq!(de.read_bytes().unwrap(), vec![0x01, 0xff, 0x02]);
523		assert!(de.is_empty());
524	}
525
526	#[test]
527	fn test_read_str() {
528		let mut ser = KeySerializer::new();
529		ser.extend_str("hello world").extend_str("👋");
530		let bytes = ser.finish();
531
532		let mut de = KeyDeserializer::from_bytes(&bytes);
533		assert_eq!(de.read_str().unwrap(), "hello world");
534		assert_eq!(de.read_str().unwrap(), "👋");
535		assert!(de.is_empty());
536	}
537
538	#[test]
539	fn test_read_date() {
540		let mut ser = KeySerializer::new();
541		let date = Date::from_ymd(2024, 1, 1).unwrap();
542		ser.extend_date(&date);
543		let bytes = ser.finish();
544
545		let mut de = KeyDeserializer::from_bytes(&bytes);
546		assert_eq!(de.read_date().unwrap(), date);
547		assert!(de.is_empty());
548	}
549
550	#[test]
551	fn test_read_datetime() {
552		let mut ser = KeySerializer::new();
553		let datetime = DateTime::from_ymd_hms(2024, 1, 1, 12, 30, 45).unwrap();
554		ser.extend_datetime(&datetime);
555		let bytes = ser.finish();
556
557		let mut de = KeyDeserializer::from_bytes(&bytes);
558		assert_eq!(de.read_datetime().unwrap(), datetime);
559		assert!(de.is_empty());
560	}
561
562	#[test]
563	fn test_read_time() {
564		let mut ser = KeySerializer::new();
565		let time = Time::from_hms(12, 30, 45).unwrap();
566		ser.extend_time(&time);
567		let bytes = ser.finish();
568
569		let mut de = KeyDeserializer::from_bytes(&bytes);
570		assert_eq!(de.read_time().unwrap(), time);
571		assert!(de.is_empty());
572	}
573
574	#[test]
575	fn test_read_duration() {
576		let mut ser = KeySerializer::new();
577		let duration = Duration::from_nanoseconds(1000000);
578		ser.extend_duration(&duration);
579		let bytes = ser.finish();
580
581		let mut de = KeyDeserializer::from_bytes(&bytes);
582		assert_eq!(de.read_duration().unwrap(), duration);
583		assert!(de.is_empty());
584	}
585
586	#[test]
587	fn test_read_row_number() {
588		let mut ser = KeySerializer::new();
589		let row = RowNumber(42);
590		ser.extend_row_number(&row);
591		let bytes = ser.finish();
592
593		let mut de = KeyDeserializer::from_bytes(&bytes);
594		assert_eq!(de.read_row_number().unwrap(), row);
595		assert!(de.is_empty());
596	}
597
598	#[test]
599	fn test_read_primitive_id() {
600		let mut ser = KeySerializer::new();
601		let primitive = PrimitiveId::table(42);
602		ser.extend_primitive_id(primitive);
603		let bytes = ser.finish();
604
605		let mut de = KeyDeserializer::from_bytes(&bytes);
606		assert_eq!(de.read_primitive_id().unwrap(), primitive);
607		assert!(de.is_empty());
608	}
609
610	#[test]
611	fn test_read_index_id() {
612		let mut ser = KeySerializer::new();
613		let index = IndexId::primary(999);
614		ser.extend_index_id(index);
615		let bytes = ser.finish();
616
617		let mut de = KeyDeserializer::from_bytes(&bytes);
618		assert_eq!(de.read_index_id().unwrap(), index);
619		assert!(de.is_empty());
620	}
621
622	#[test]
623	fn test_position_tracking() {
624		let mut ser = KeySerializer::new();
625		ser.extend_u8(1u8).extend_u16(2u16).extend_u32(3u32);
626		let bytes = ser.finish();
627
628		let mut de = KeyDeserializer::from_bytes(&bytes);
629		assert_eq!(de.position(), 0);
630		assert_eq!(de.remaining(), 7);
631
632		de.read_u8().unwrap();
633		assert_eq!(de.position(), 1);
634		assert_eq!(de.remaining(), 6);
635
636		de.read_u16().unwrap();
637		assert_eq!(de.position(), 3);
638		assert_eq!(de.remaining(), 4);
639
640		de.read_u32().unwrap();
641		assert_eq!(de.position(), 7);
642		assert_eq!(de.remaining(), 0);
643		assert!(de.is_empty());
644	}
645
646	#[test]
647	fn test_error_on_insufficient_bytes() {
648		let bytes = vec![0x00, 0x01];
649		let mut de = KeyDeserializer::from_bytes(&bytes);
650		assert!(de.read_u32().is_err());
651	}
652
653	#[test]
654	fn test_chaining() {
655		let mut ser = KeySerializer::new();
656		ser.extend_bool(true).extend_i32(42i32).extend_str("test").extend_u64(1000u64);
657		let bytes = ser.finish();
658
659		let mut de = KeyDeserializer::from_bytes(&bytes);
660		assert_eq!(de.read_bool().unwrap(), true);
661		assert_eq!(de.read_i32().unwrap(), 42);
662		assert_eq!(de.read_str().unwrap(), "test");
663		assert_eq!(de.read_u64().unwrap(), 1000);
664		assert!(de.is_empty());
665	}
666}