Skip to main content

reifydb_core/util/encoding/keycode/
deserializer.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright (c) 2025 ReifyDB
3
4use num_bigint::Sign;
5use reifydb_type::{
6	error,
7	error::diagnostic::serde::serde_keycode_error,
8	value::{
9		blob::Blob,
10		date::Date,
11		datetime::DateTime,
12		decimal::Decimal,
13		duration::Duration,
14		identity::IdentityId,
15		int::Int,
16		ordered_f32::OrderedF32,
17		ordered_f64::OrderedF64,
18		row_number::RowNumber,
19		time::Time,
20		uint::Uint,
21		uuid::{Uuid4, Uuid7},
22	},
23};
24
25use super::{catalog, deserialize};
26use crate::interface::catalog::{id::IndexId, primitive::PrimitiveId};
27
28pub struct KeyDeserializer<'a> {
29	buffer: &'a [u8],
30	position: usize,
31}
32
33impl<'a> KeyDeserializer<'a> {
34	pub fn from_bytes(buffer: &'a [u8]) -> Self {
35		Self {
36			buffer,
37			position: 0,
38		}
39	}
40
41	pub fn remaining(&self) -> usize {
42		self.buffer.len().saturating_sub(self.position)
43	}
44
45	pub fn is_empty(&self) -> bool {
46		self.remaining() == 0
47	}
48
49	pub fn position(&self) -> usize {
50		self.position
51	}
52
53	fn read_exact(&mut self, count: usize) -> reifydb_type::Result<&'a [u8]> {
54		if self.remaining() < count {
55			return Err(error!(serde_keycode_error(format!(
56				"unexpected end of key at position {}: need {} bytes, have {}",
57				self.position,
58				count,
59				self.remaining()
60			))));
61		}
62		let start = self.position;
63		self.position += count;
64		Ok(&self.buffer[start..self.position])
65	}
66
67	pub fn read_bool(&mut self) -> reifydb_type::Result<bool> {
68		let bytes = self.read_exact(1)?;
69		Ok(deserialize::<bool>(bytes)?)
70	}
71
72	pub fn read_f32(&mut self) -> reifydb_type::Result<f32> {
73		let bytes = self.read_exact(4)?;
74		Ok(deserialize::<f32>(bytes)?)
75	}
76
77	pub fn read_f64(&mut self) -> reifydb_type::Result<f64> {
78		let bytes = self.read_exact(8)?;
79		Ok(deserialize::<f64>(bytes)?)
80	}
81
82	pub fn read_i8(&mut self) -> reifydb_type::Result<i8> {
83		let bytes = self.read_exact(1)?;
84		Ok(deserialize::<i8>(bytes)?)
85	}
86
87	pub fn read_i16(&mut self) -> reifydb_type::Result<i16> {
88		let bytes = self.read_exact(2)?;
89		Ok(deserialize::<i16>(bytes)?)
90	}
91
92	pub fn read_i32(&mut self) -> reifydb_type::Result<i32> {
93		let bytes = self.read_exact(4)?;
94		Ok(deserialize::<i32>(bytes)?)
95	}
96
97	pub fn read_i64(&mut self) -> reifydb_type::Result<i64> {
98		let bytes = self.read_exact(8)?;
99		Ok(deserialize::<i64>(bytes)?)
100	}
101
102	pub fn read_i128(&mut self) -> reifydb_type::Result<i128> {
103		let bytes = self.read_exact(16)?;
104		Ok(deserialize::<i128>(bytes)?)
105	}
106
107	pub fn read_u8(&mut self) -> reifydb_type::Result<u8> {
108		let bytes = self.read_exact(1)?;
109		Ok(deserialize::<u8>(bytes)?)
110	}
111
112	pub fn read_u16(&mut self) -> reifydb_type::Result<u16> {
113		let bytes = self.read_exact(2)?;
114		Ok(deserialize::<u16>(bytes)?)
115	}
116
117	pub fn read_u32(&mut self) -> reifydb_type::Result<u32> {
118		let bytes = self.read_exact(4)?;
119		Ok(deserialize::<u32>(bytes)?)
120	}
121
122	pub fn read_u64(&mut self) -> reifydb_type::Result<u64> {
123		let bytes = self.read_exact(8)?;
124		Ok(deserialize::<u64>(bytes)?)
125	}
126
127	pub fn read_u128(&mut self) -> reifydb_type::Result<u128> {
128		let bytes = self.read_exact(16)?;
129		Ok(deserialize::<u128>(bytes)?)
130	}
131
132	pub fn read_bytes(&mut self) -> reifydb_type::Result<Vec<u8>> {
133		let mut result = Vec::new();
134		loop {
135			if self.remaining() < 1 {
136				return Err(error!(serde_keycode_error(format!(
137					"unexpected end of key at position {}: bytes not terminated",
138					self.position
139				))));
140			}
141			let byte = self.buffer[self.position];
142			self.position += 1;
143
144			if byte == 0xff {
145				if self.remaining() < 1 {
146					return Err(error!(serde_keycode_error(format!(
147						"unexpected end of key at position {}: incomplete escape sequence",
148						self.position
149					))));
150				}
151				let next_byte = self.buffer[self.position];
152				self.position += 1;
153
154				if next_byte == 0x00 {
155					result.push(0xff);
156				} else if next_byte == 0xff {
157					break;
158				} else {
159					return Err(error!(serde_keycode_error(format!(
160						"invalid escape sequence at position {}: 0xff 0x{:02x}",
161						self.position - 1,
162						next_byte
163					))));
164				}
165			} else {
166				result.push(byte);
167			}
168		}
169		Ok(result)
170	}
171
172	pub fn read_str(&mut self) -> reifydb_type::Result<String> {
173		let bytes = self.read_bytes()?;
174		String::from_utf8(bytes).map_err(|e| {
175			error!(serde_keycode_error(format!(
176				"invalid UTF-8 in key at position {}: {}",
177				self.position, e
178			)))
179		})
180	}
181
182	pub fn read_primitive_id(&mut self) -> reifydb_type::Result<PrimitiveId> {
183		let bytes = self.read_exact(9)?;
184		catalog::deserialize_primitive_id(bytes)
185	}
186
187	pub fn read_index_id(&mut self) -> reifydb_type::Result<IndexId> {
188		let bytes = self.read_exact(9)?;
189		catalog::deserialize_index_id(bytes)
190	}
191
192	pub fn read_date(&mut self) -> reifydb_type::Result<Date> {
193		let days = self.read_i32()?;
194		Date::from_days_since_epoch(days).ok_or_else(|| {
195			error!(serde_keycode_error(format!(
196				"invalid date at position {}: {} days since epoch",
197				self.position, days
198			)))
199		})
200	}
201
202	pub fn read_datetime(&mut self) -> reifydb_type::Result<DateTime> {
203		let nanos = self.read_i64()?;
204		Ok(DateTime::from_nanos_since_epoch(nanos))
205	}
206
207	pub fn read_time(&mut self) -> reifydb_type::Result<Time> {
208		let nanos = self.read_u64()?;
209		Time::from_nanos_since_midnight(nanos).ok_or_else(|| {
210			error!(serde_keycode_error(format!(
211				"invalid time at position {}: {} nanos since midnight",
212				self.position, nanos
213			)))
214		})
215	}
216
217	pub fn read_duration(&mut self) -> reifydb_type::Result<Duration> {
218		let nanos = self.read_i64()?;
219		Ok(Duration::from_nanoseconds(nanos))
220	}
221
222	pub fn read_row_number(&mut self) -> reifydb_type::Result<RowNumber> {
223		let value = self.read_u64()?;
224		Ok(RowNumber(value))
225	}
226
227	pub fn read_identity_id(&mut self) -> reifydb_type::Result<IdentityId> {
228		let bytes = self.read_bytes()?;
229		let uuid = uuid::Uuid::from_slice(&bytes).map_err(|e| {
230			error!(serde_keycode_error(format!("invalid IdentityId at position {}: {}", self.position, e)))
231		})?;
232		Ok(IdentityId::from(Uuid7::from(uuid)))
233	}
234
235	pub fn read_uuid4(&mut self) -> reifydb_type::Result<Uuid4> {
236		let bytes = self.read_bytes()?;
237		let uuid = uuid::Uuid::from_slice(&bytes).map_err(|e| {
238			error!(serde_keycode_error(format!("invalid Uuid4 at position {}: {}", self.position, e)))
239		})?;
240		Ok(Uuid4::from(uuid))
241	}
242
243	pub fn read_uuid7(&mut self) -> reifydb_type::Result<Uuid7> {
244		let bytes = self.read_bytes()?;
245		let uuid = uuid::Uuid::from_slice(&bytes).map_err(|e| {
246			error!(serde_keycode_error(format!("invalid Uuid7 at position {}: {}", self.position, e)))
247		})?;
248		Ok(Uuid7::from(uuid))
249	}
250
251	pub fn read_blob(&mut self) -> reifydb_type::Result<Blob> {
252		let bytes = self.read_bytes()?;
253		Ok(Blob::from(bytes))
254	}
255
256	pub fn read_int(&mut self) -> reifydb_type::Result<Int> {
257		let sign = self.read_exact(1)?[0];
258		let len = self.read_u32()? as usize;
259		let bytes = self.read_exact(len)?;
260
261		let sign = match sign {
262			0 => Sign::Minus,
263			_ => Sign::Plus,
264		};
265
266		Ok(Int(num_bigint::BigInt::from_bytes_be(sign, bytes)))
267	}
268
269	pub fn read_uint(&mut self) -> reifydb_type::Result<Uint> {
270		let len = self.read_u32()? as usize;
271		let bytes = self.read_exact(len)?;
272		Ok(Uint(num_bigint::BigInt::from_bytes_be(Sign::Plus, bytes)))
273	}
274
275	pub fn read_decimal(&mut self) -> reifydb_type::Result<Decimal> {
276		let s = self.read_str()?;
277		s.parse::<Decimal>().map_err(|e| {
278			error!(serde_keycode_error(format!("invalid Decimal at position {}: {}", self.position, e)))
279		})
280	}
281
282	pub fn read_value(&mut self) -> reifydb_type::Result<reifydb_type::value::Value> {
283		use reifydb_type::value::Value;
284
285		if self.remaining() < 1 {
286			return Err(error!(serde_keycode_error(format!(
287				"unexpected end of key at position {}: cannot read value type",
288				self.position
289			))));
290		}
291
292		let type_marker = self.buffer[self.position];
293		self.position += 1;
294
295		match type_marker {
296			0x00 => {
297				if self.remaining() > 0 && self.buffer[self.position] == 0x00 {
298					Ok(Value::Boolean(true))
299				} else {
300					Ok(Value::none())
301				}
302			}
303			0x01 => {
304				let b = self.read_bool()?;
305				Ok(Value::Boolean(b))
306			}
307			0x02 => {
308				let f = self.read_f32()?;
309				Ok(Value::Float4(OrderedF32::try_from(f).map_err(|e| {
310					error!(serde_keycode_error(format!(
311						"invalid f32 at position {}: {}",
312						self.position, e
313					)))
314				})?))
315			}
316			0x03 => {
317				let f = self.read_f64()?;
318				Ok(Value::Float8(OrderedF64::try_from(f).map_err(|e| {
319					error!(serde_keycode_error(format!(
320						"invalid f64 at position {}: {}",
321						self.position, e
322					)))
323				})?))
324			}
325			0x04 => {
326				let i = self.read_i8()?;
327				Ok(Value::Int1(i))
328			}
329			0x05 => {
330				let i = self.read_i16()?;
331				Ok(Value::Int2(i))
332			}
333			0x06 => {
334				let i = self.read_i32()?;
335				Ok(Value::Int4(i))
336			}
337			0x07 => {
338				let i = self.read_i64()?;
339				Ok(Value::Int8(i))
340			}
341			0x08 => {
342				let i = self.read_i128()?;
343				Ok(Value::Int16(i))
344			}
345			0x09 => {
346				let s = self.read_str()?;
347				Ok(Value::Utf8(s))
348			}
349			0x0a => {
350				let u = self.read_u8()?;
351				Ok(Value::Uint1(u))
352			}
353			0x0b => {
354				let u = self.read_u16()?;
355				Ok(Value::Uint2(u))
356			}
357			0x0c => {
358				let u = self.read_u32()?;
359				Ok(Value::Uint4(u))
360			}
361			0x0d => {
362				let u = self.read_u64()?;
363				Ok(Value::Uint8(u))
364			}
365			0x0e => {
366				let u = self.read_u128()?;
367				Ok(Value::Uint16(u))
368			}
369			0x0f => {
370				let d = self.read_date()?;
371				Ok(Value::Date(d))
372			}
373			0x10 => {
374				let dt = self.read_datetime()?;
375				Ok(Value::DateTime(dt))
376			}
377			0x11 => {
378				let t = self.read_time()?;
379				Ok(Value::Time(t))
380			}
381			0x12 => {
382				let i = self.read_duration()?;
383				Ok(Value::Duration(i))
384			}
385			// 0x13 was RowNumber, now reserved
386			0x13 => panic!("Type code 0x13 (RowNumber) is no longer supported"),
387			0x14 => {
388				let id = self.read_identity_id()?;
389				Ok(Value::IdentityId(id))
390			}
391			0x15 => {
392				let u = self.read_uuid4()?;
393				Ok(Value::Uuid4(u))
394			}
395			0x16 => {
396				let u = self.read_uuid7()?;
397				Ok(Value::Uuid7(u))
398			}
399			0x17 => {
400				let b = self.read_blob()?;
401				Ok(Value::Blob(b))
402			}
403			0x18 => {
404				let i = self.read_int()?;
405				Ok(Value::Int(i))
406			}
407			0x19 => {
408				let u = self.read_uint()?;
409				Ok(Value::Uint(u))
410			}
411			0x1a => {
412				let d = self.read_decimal()?;
413				Ok(Value::Decimal(d))
414			}
415			_ => Err(error!(serde_keycode_error(format!(
416				"unknown value type marker 0x{:02x} at position {}",
417				type_marker,
418				self.position - 1
419			)))),
420		}
421	}
422
423	pub fn read_raw(&mut self, count: usize) -> reifydb_type::Result<&'a [u8]> {
424		self.read_exact(count)
425	}
426}
427
428#[cfg(test)]
429pub mod tests {
430	use std::f64::consts::E;
431
432	use reifydb_type::value::{
433		date::Date, datetime::DateTime, duration::Duration, row_number::RowNumber, time::Time,
434	};
435
436	use crate::{
437		interface::catalog::{id::IndexId, primitive::PrimitiveId},
438		util::encoding::keycode::{deserializer::KeyDeserializer, serializer::KeySerializer},
439	};
440
441	#[test]
442	fn test_read_bool() {
443		let mut ser = KeySerializer::new();
444		ser.extend_bool(true).extend_bool(false);
445		let bytes = ser.finish();
446
447		let mut de = KeyDeserializer::from_bytes(&bytes);
448		assert_eq!(de.read_bool().unwrap(), true);
449		assert_eq!(de.read_bool().unwrap(), false);
450		assert!(de.is_empty());
451	}
452
453	#[test]
454	fn test_read_integers() {
455		let mut ser = KeySerializer::new();
456		ser.extend_i8(-42i8).extend_i16(-1000i16).extend_i32(100000i32).extend_i64(-1000000000i64);
457		let bytes = ser.finish();
458
459		let mut de = KeyDeserializer::from_bytes(&bytes);
460		assert_eq!(de.read_i8().unwrap(), -42);
461		assert_eq!(de.read_i16().unwrap(), -1000);
462		assert_eq!(de.read_i32().unwrap(), 100000);
463		assert_eq!(de.read_i64().unwrap(), -1000000000);
464		assert!(de.is_empty());
465	}
466
467	#[test]
468	fn test_read_unsigned() {
469		let mut ser = KeySerializer::new();
470		ser.extend_u8(255u8).extend_u16(65535u16).extend_u32(4294967295u32).extend_u64(18446744073709551615u64);
471		let bytes = ser.finish();
472
473		let mut de = KeyDeserializer::from_bytes(&bytes);
474		assert_eq!(de.read_u8().unwrap(), 255);
475		assert_eq!(de.read_u16().unwrap(), 65535);
476		assert_eq!(de.read_u32().unwrap(), 4294967295);
477		assert_eq!(de.read_u64().unwrap(), 18446744073709551615);
478		assert!(de.is_empty());
479	}
480
481	#[test]
482	fn test_read_floats() {
483		let mut ser = KeySerializer::new();
484		ser.extend_f32(3.14).extend_f64(E);
485		let bytes = ser.finish();
486
487		let mut de = KeyDeserializer::from_bytes(&bytes);
488		assert!((de.read_f32().unwrap() - 3.14).abs() < 0.001);
489		assert!((de.read_f64().unwrap() - E).abs() < 0.000001);
490		assert!(de.is_empty());
491	}
492
493	#[test]
494	fn test_read_bytes() {
495		let mut ser = KeySerializer::new();
496		ser.extend_bytes(b"hello").extend_bytes(&[0x01, 0xff, 0x02]);
497		let bytes = ser.finish();
498
499		let mut de = KeyDeserializer::from_bytes(&bytes);
500		assert_eq!(de.read_bytes().unwrap(), b"hello");
501		assert_eq!(de.read_bytes().unwrap(), vec![0x01, 0xff, 0x02]);
502		assert!(de.is_empty());
503	}
504
505	#[test]
506	fn test_read_str() {
507		let mut ser = KeySerializer::new();
508		ser.extend_str("hello world").extend_str("👋");
509		let bytes = ser.finish();
510
511		let mut de = KeyDeserializer::from_bytes(&bytes);
512		assert_eq!(de.read_str().unwrap(), "hello world");
513		assert_eq!(de.read_str().unwrap(), "👋");
514		assert!(de.is_empty());
515	}
516
517	#[test]
518	fn test_read_date() {
519		let mut ser = KeySerializer::new();
520		let date = Date::from_ymd(2024, 1, 1).unwrap();
521		ser.extend_date(&date);
522		let bytes = ser.finish();
523
524		let mut de = KeyDeserializer::from_bytes(&bytes);
525		assert_eq!(de.read_date().unwrap(), date);
526		assert!(de.is_empty());
527	}
528
529	#[test]
530	fn test_read_datetime() {
531		let mut ser = KeySerializer::new();
532		let datetime = DateTime::from_ymd_hms(2024, 1, 1, 12, 30, 45).unwrap();
533		ser.extend_datetime(&datetime);
534		let bytes = ser.finish();
535
536		let mut de = KeyDeserializer::from_bytes(&bytes);
537		assert_eq!(de.read_datetime().unwrap(), datetime);
538		assert!(de.is_empty());
539	}
540
541	#[test]
542	fn test_read_time() {
543		let mut ser = KeySerializer::new();
544		let time = Time::from_hms(12, 30, 45).unwrap();
545		ser.extend_time(&time);
546		let bytes = ser.finish();
547
548		let mut de = KeyDeserializer::from_bytes(&bytes);
549		assert_eq!(de.read_time().unwrap(), time);
550		assert!(de.is_empty());
551	}
552
553	#[test]
554	fn test_read_duration() {
555		let mut ser = KeySerializer::new();
556		let duration = Duration::from_nanoseconds(1000000);
557		ser.extend_duration(&duration);
558		let bytes = ser.finish();
559
560		let mut de = KeyDeserializer::from_bytes(&bytes);
561		assert_eq!(de.read_duration().unwrap(), duration);
562		assert!(de.is_empty());
563	}
564
565	#[test]
566	fn test_read_row_number() {
567		let mut ser = KeySerializer::new();
568		let row = RowNumber(42);
569		ser.extend_row_number(&row);
570		let bytes = ser.finish();
571
572		let mut de = KeyDeserializer::from_bytes(&bytes);
573		assert_eq!(de.read_row_number().unwrap(), row);
574		assert!(de.is_empty());
575	}
576
577	#[test]
578	fn test_read_primitive_id() {
579		let mut ser = KeySerializer::new();
580		let primitive = PrimitiveId::table(42);
581		ser.extend_primitive_id(primitive);
582		let bytes = ser.finish();
583
584		let mut de = KeyDeserializer::from_bytes(&bytes);
585		assert_eq!(de.read_primitive_id().unwrap(), primitive);
586		assert!(de.is_empty());
587	}
588
589	#[test]
590	fn test_read_index_id() {
591		let mut ser = KeySerializer::new();
592		let index = IndexId::primary(999);
593		ser.extend_index_id(index);
594		let bytes = ser.finish();
595
596		let mut de = KeyDeserializer::from_bytes(&bytes);
597		assert_eq!(de.read_index_id().unwrap(), index);
598		assert!(de.is_empty());
599	}
600
601	#[test]
602	fn test_position_tracking() {
603		let mut ser = KeySerializer::new();
604		ser.extend_u8(1u8).extend_u16(2u16).extend_u32(3u32);
605		let bytes = ser.finish();
606
607		let mut de = KeyDeserializer::from_bytes(&bytes);
608		assert_eq!(de.position(), 0);
609		assert_eq!(de.remaining(), 7);
610
611		de.read_u8().unwrap();
612		assert_eq!(de.position(), 1);
613		assert_eq!(de.remaining(), 6);
614
615		de.read_u16().unwrap();
616		assert_eq!(de.position(), 3);
617		assert_eq!(de.remaining(), 4);
618
619		de.read_u32().unwrap();
620		assert_eq!(de.position(), 7);
621		assert_eq!(de.remaining(), 0);
622		assert!(de.is_empty());
623	}
624
625	#[test]
626	fn test_error_on_insufficient_bytes() {
627		let bytes = vec![0x00, 0x01];
628		let mut de = KeyDeserializer::from_bytes(&bytes);
629		assert!(de.read_u32().is_err());
630	}
631
632	#[test]
633	fn test_chaining() {
634		let mut ser = KeySerializer::new();
635		ser.extend_bool(true).extend_i32(42i32).extend_str("test").extend_u64(1000u64);
636		let bytes = ser.finish();
637
638		let mut de = KeyDeserializer::from_bytes(&bytes);
639		assert_eq!(de.read_bool().unwrap(), true);
640		assert_eq!(de.read_i32().unwrap(), 42);
641		assert_eq!(de.read_str().unwrap(), "test");
642		assert_eq!(de.read_u64().unwrap(), 1000);
643		assert!(de.is_empty());
644	}
645}