reifydb_core/util/encoding/keycode/
deserializer.rs

1// Copyright (c) reifydb.com 2025
2// This file is licensed under the AGPL-3.0-or-later, see license.md file
3
4use num_bigint::Sign;
5use reifydb_type::{
6	Blob, Date, DateTime, Decimal, Duration, IdentityId, Int, OrderedF32, RowNumber, Time, Uint, Uuid4, Uuid7,
7	diagnostic::serde::serde_keycode_error,
8};
9
10use super::{catalog, deserialize};
11use crate::{
12	Result, error,
13	interface::{IndexId, SourceId},
14};
15
16pub struct KeyDeserializer<'a> {
17	buffer: &'a [u8],
18	position: usize,
19}
20
21impl<'a> KeyDeserializer<'a> {
22	pub fn from_bytes(buffer: &'a [u8]) -> Self {
23		Self {
24			buffer,
25			position: 0,
26		}
27	}
28
29	pub fn remaining(&self) -> usize {
30		self.buffer.len().saturating_sub(self.position)
31	}
32
33	pub fn is_empty(&self) -> bool {
34		self.remaining() == 0
35	}
36
37	pub fn position(&self) -> usize {
38		self.position
39	}
40
41	fn read_exact(&mut self, count: usize) -> Result<&'a [u8]> {
42		if self.remaining() < count {
43			return Err(error!(serde_keycode_error(format!(
44				"unexpected end of key at position {}: need {} bytes, have {}",
45				self.position,
46				count,
47				self.remaining()
48			))));
49		}
50		let start = self.position;
51		self.position += count;
52		Ok(&self.buffer[start..self.position])
53	}
54
55	pub fn read_bool(&mut self) -> Result<bool> {
56		let bytes = self.read_exact(1)?;
57		Ok(deserialize::<bool>(bytes)?)
58	}
59
60	pub fn read_f32(&mut self) -> Result<f32> {
61		let bytes = self.read_exact(4)?;
62		Ok(deserialize::<f32>(bytes)?)
63	}
64
65	pub fn read_f64(&mut self) -> Result<f64> {
66		let bytes = self.read_exact(8)?;
67		Ok(deserialize::<f64>(bytes)?)
68	}
69
70	pub fn read_i8(&mut self) -> Result<i8> {
71		let bytes = self.read_exact(1)?;
72		Ok(deserialize::<i8>(bytes)?)
73	}
74
75	pub fn read_i16(&mut self) -> Result<i16> {
76		let bytes = self.read_exact(2)?;
77		Ok(deserialize::<i16>(bytes)?)
78	}
79
80	pub fn read_i32(&mut self) -> Result<i32> {
81		let bytes = self.read_exact(4)?;
82		Ok(deserialize::<i32>(bytes)?)
83	}
84
85	pub fn read_i64(&mut self) -> Result<i64> {
86		let bytes = self.read_exact(8)?;
87		Ok(deserialize::<i64>(bytes)?)
88	}
89
90	pub fn read_i128(&mut self) -> Result<i128> {
91		let bytes = self.read_exact(16)?;
92		Ok(deserialize::<i128>(bytes)?)
93	}
94
95	pub fn read_u8(&mut self) -> Result<u8> {
96		let bytes = self.read_exact(1)?;
97		Ok(deserialize::<u8>(bytes)?)
98	}
99
100	pub fn read_u16(&mut self) -> Result<u16> {
101		let bytes = self.read_exact(2)?;
102		Ok(deserialize::<u16>(bytes)?)
103	}
104
105	pub fn read_u32(&mut self) -> Result<u32> {
106		let bytes = self.read_exact(4)?;
107		Ok(deserialize::<u32>(bytes)?)
108	}
109
110	pub fn read_u64(&mut self) -> Result<u64> {
111		let bytes = self.read_exact(8)?;
112		Ok(deserialize::<u64>(bytes)?)
113	}
114
115	pub fn read_u128(&mut self) -> Result<u128> {
116		let bytes = self.read_exact(16)?;
117		Ok(deserialize::<u128>(bytes)?)
118	}
119
120	pub fn read_bytes(&mut self) -> Result<Vec<u8>> {
121		let mut result = Vec::new();
122		loop {
123			if self.remaining() < 1 {
124				return Err(error!(serde_keycode_error(format!(
125					"unexpected end of key at position {}: bytes not terminated",
126					self.position
127				))));
128			}
129			let byte = self.buffer[self.position];
130			self.position += 1;
131
132			if byte == 0xff {
133				if self.remaining() < 1 {
134					return Err(error!(serde_keycode_error(format!(
135						"unexpected end of key at position {}: incomplete escape sequence",
136						self.position
137					))));
138				}
139				let next_byte = self.buffer[self.position];
140				self.position += 1;
141
142				if next_byte == 0x00 {
143					result.push(0xff);
144				} else if next_byte == 0xff {
145					break;
146				} else {
147					return Err(error!(serde_keycode_error(format!(
148						"invalid escape sequence at position {}: 0xff 0x{:02x}",
149						self.position - 1,
150						next_byte
151					))));
152				}
153			} else {
154				result.push(byte);
155			}
156		}
157		Ok(result)
158	}
159
160	pub fn read_str(&mut self) -> Result<String> {
161		let bytes = self.read_bytes()?;
162		String::from_utf8(bytes).map_err(|e| {
163			error!(serde_keycode_error(format!(
164				"invalid UTF-8 in key at position {}: {}",
165				self.position, e
166			)))
167		})
168	}
169
170	pub fn read_source_id(&mut self) -> Result<SourceId> {
171		let bytes = self.read_exact(9)?;
172		catalog::deserialize_source_id(bytes)
173	}
174
175	pub fn read_index_id(&mut self) -> Result<IndexId> {
176		let bytes = self.read_exact(9)?;
177		catalog::deserialize_index_id(bytes)
178	}
179
180	pub fn read_date(&mut self) -> Result<Date> {
181		let days = self.read_i32()?;
182		Date::from_days_since_epoch(days).ok_or_else(|| {
183			error!(serde_keycode_error(format!(
184				"invalid date at position {}: {} days since epoch",
185				self.position, days
186			)))
187		})
188	}
189
190	pub fn read_datetime(&mut self) -> Result<DateTime> {
191		let nanos = self.read_i64()?;
192		Ok(DateTime::from_nanos_since_epoch(nanos))
193	}
194
195	pub fn read_time(&mut self) -> Result<Time> {
196		let nanos = self.read_u64()?;
197		Time::from_nanos_since_midnight(nanos).ok_or_else(|| {
198			error!(serde_keycode_error(format!(
199				"invalid time at position {}: {} nanos since midnight",
200				self.position, nanos
201			)))
202		})
203	}
204
205	pub fn read_duration(&mut self) -> Result<Duration> {
206		let nanos = self.read_i64()?;
207		Ok(Duration::from_nanoseconds(nanos))
208	}
209
210	pub fn read_row_number(&mut self) -> Result<RowNumber> {
211		let value = self.read_u64()?;
212		Ok(RowNumber(value))
213	}
214
215	pub fn read_identity_id(&mut self) -> Result<IdentityId> {
216		let bytes = self.read_bytes()?;
217		let uuid = uuid::Uuid::from_slice(&bytes).map_err(|e| {
218			error!(serde_keycode_error(format!("invalid IdentityId at position {}: {}", self.position, e)))
219		})?;
220		Ok(IdentityId::from(Uuid7::from(uuid)))
221	}
222
223	pub fn read_uuid4(&mut self) -> Result<Uuid4> {
224		let bytes = self.read_bytes()?;
225		let uuid = uuid::Uuid::from_slice(&bytes).map_err(|e| {
226			error!(serde_keycode_error(format!("invalid Uuid4 at position {}: {}", self.position, e)))
227		})?;
228		Ok(Uuid4::from(uuid))
229	}
230
231	pub fn read_uuid7(&mut self) -> Result<Uuid7> {
232		let bytes = self.read_bytes()?;
233		let uuid = uuid::Uuid::from_slice(&bytes).map_err(|e| {
234			error!(serde_keycode_error(format!("invalid Uuid7 at position {}: {}", self.position, e)))
235		})?;
236		Ok(Uuid7::from(uuid))
237	}
238
239	pub fn read_blob(&mut self) -> Result<Blob> {
240		let bytes = self.read_bytes()?;
241		Ok(Blob::from(bytes))
242	}
243
244	pub fn read_int(&mut self) -> Result<Int> {
245		let sign = self.read_exact(1)?[0];
246		let len = self.read_u32()? as usize;
247		let bytes = self.read_exact(len)?;
248
249		let sign = match sign {
250			0 => Sign::Minus,
251			_ => Sign::Plus,
252		};
253
254		Ok(Int(num_bigint::BigInt::from_bytes_be(sign, bytes)))
255	}
256
257	pub fn read_uint(&mut self) -> Result<Uint> {
258		let len = self.read_u32()? as usize;
259		let bytes = self.read_exact(len)?;
260		Ok(Uint(num_bigint::BigInt::from_bytes_be(Sign::Plus, bytes)))
261	}
262
263	pub fn read_decimal(&mut self) -> Result<Decimal> {
264		let s = self.read_str()?;
265		s.parse::<Decimal>().map_err(|e| {
266			error!(serde_keycode_error(format!("invalid Decimal at position {}: {}", self.position, e)))
267		})
268	}
269
270	pub fn read_value(&mut self) -> Result<reifydb_type::Value> {
271		use reifydb_type::Value;
272
273		if self.remaining() < 1 {
274			return Err(error!(serde_keycode_error(format!(
275				"unexpected end of key at position {}: cannot read value type",
276				self.position
277			))));
278		}
279
280		let type_marker = self.buffer[self.position];
281		self.position += 1;
282
283		match type_marker {
284			0x00 => {
285				if self.remaining() > 0 && self.buffer[self.position] == 0x00 {
286					Ok(Value::Boolean(true))
287				} else {
288					Ok(Value::Undefined)
289				}
290			}
291			0x01 => {
292				let b = self.read_bool()?;
293				Ok(Value::Boolean(b))
294			}
295			0x02 => {
296				let f = self.read_f32()?;
297				Ok(Value::Float4(OrderedF32::try_from(f).map_err(|e| {
298					error!(reifydb_type::diagnostic::serde::serde_keycode_error(format!(
299						"invalid f32 at position {}: {}",
300						self.position, e
301					)))
302				})?))
303			}
304			0x03 => {
305				let f = self.read_f64()?;
306				Ok(Value::Float8(reifydb_type::OrderedF64::try_from(f).map_err(|e| {
307					error!(serde_keycode_error(format!(
308						"invalid f64 at position {}: {}",
309						self.position, e
310					)))
311				})?))
312			}
313			0x04 => {
314				let i = self.read_i8()?;
315				Ok(Value::Int1(i))
316			}
317			0x05 => {
318				let i = self.read_i16()?;
319				Ok(Value::Int2(i))
320			}
321			0x06 => {
322				let i = self.read_i32()?;
323				Ok(Value::Int4(i))
324			}
325			0x07 => {
326				let i = self.read_i64()?;
327				Ok(Value::Int8(i))
328			}
329			0x08 => {
330				let i = self.read_i128()?;
331				Ok(Value::Int16(i))
332			}
333			0x09 => {
334				let s = self.read_str()?;
335				Ok(Value::Utf8(s))
336			}
337			0x0a => {
338				let u = self.read_u8()?;
339				Ok(Value::Uint1(u))
340			}
341			0x0b => {
342				let u = self.read_u16()?;
343				Ok(Value::Uint2(u))
344			}
345			0x0c => {
346				let u = self.read_u32()?;
347				Ok(Value::Uint4(u))
348			}
349			0x0d => {
350				let u = self.read_u64()?;
351				Ok(Value::Uint8(u))
352			}
353			0x0e => {
354				let u = self.read_u128()?;
355				Ok(Value::Uint16(u))
356			}
357			0x0f => {
358				let d = self.read_date()?;
359				Ok(Value::Date(d))
360			}
361			0x10 => {
362				let dt = self.read_datetime()?;
363				Ok(Value::DateTime(dt))
364			}
365			0x11 => {
366				let t = self.read_time()?;
367				Ok(Value::Time(t))
368			}
369			0x12 => {
370				let i = self.read_duration()?;
371				Ok(Value::Duration(i))
372			}
373			// 0x13 was RowNumber, now reserved
374			0x13 => panic!("Type code 0x13 (RowNumber) is no longer supported"),
375			0x14 => {
376				let id = self.read_identity_id()?;
377				Ok(Value::IdentityId(id))
378			}
379			0x15 => {
380				let u = self.read_uuid4()?;
381				Ok(Value::Uuid4(u))
382			}
383			0x16 => {
384				let u = self.read_uuid7()?;
385				Ok(Value::Uuid7(u))
386			}
387			0x17 => {
388				let b = self.read_blob()?;
389				Ok(Value::Blob(b))
390			}
391			0x18 => {
392				let i = self.read_int()?;
393				Ok(Value::Int(i))
394			}
395			0x19 => {
396				let u = self.read_uint()?;
397				Ok(Value::Uint(u))
398			}
399			0x1a => {
400				let d = self.read_decimal()?;
401				Ok(Value::Decimal(d))
402			}
403			_ => Err(error!(serde_keycode_error(format!(
404				"unknown value type marker 0x{:02x} at position {}",
405				type_marker,
406				self.position - 1
407			)))),
408		}
409	}
410
411	pub fn read_raw(&mut self, count: usize) -> Result<&'a [u8]> {
412		self.read_exact(count)
413	}
414}
415
416#[cfg(test)]
417mod tests {
418	use std::f64::consts::E;
419
420	use super::*;
421	use crate::util::encoding::keycode::KeySerializer;
422
423	#[test]
424	fn test_read_bool() {
425		let mut ser = KeySerializer::new();
426		ser.extend_bool(true).extend_bool(false);
427		let bytes = ser.finish();
428
429		let mut de = KeyDeserializer::from_bytes(&bytes);
430		assert_eq!(de.read_bool().unwrap(), true);
431		assert_eq!(de.read_bool().unwrap(), false);
432		assert!(de.is_empty());
433	}
434
435	#[test]
436	fn test_read_integers() {
437		let mut ser = KeySerializer::new();
438		ser.extend_i8(-42i8).extend_i16(-1000i16).extend_i32(100000i32).extend_i64(-1000000000i64);
439		let bytes = ser.finish();
440
441		let mut de = KeyDeserializer::from_bytes(&bytes);
442		assert_eq!(de.read_i8().unwrap(), -42);
443		assert_eq!(de.read_i16().unwrap(), -1000);
444		assert_eq!(de.read_i32().unwrap(), 100000);
445		assert_eq!(de.read_i64().unwrap(), -1000000000);
446		assert!(de.is_empty());
447	}
448
449	#[test]
450	fn test_read_unsigned() {
451		let mut ser = KeySerializer::new();
452		ser.extend_u8(255u8).extend_u16(65535u16).extend_u32(4294967295u32).extend_u64(18446744073709551615u64);
453		let bytes = ser.finish();
454
455		let mut de = KeyDeserializer::from_bytes(&bytes);
456		assert_eq!(de.read_u8().unwrap(), 255);
457		assert_eq!(de.read_u16().unwrap(), 65535);
458		assert_eq!(de.read_u32().unwrap(), 4294967295);
459		assert_eq!(de.read_u64().unwrap(), 18446744073709551615);
460		assert!(de.is_empty());
461	}
462
463	#[test]
464	fn test_read_floats() {
465		let mut ser = KeySerializer::new();
466		ser.extend_f32(3.14).extend_f64(E);
467		let bytes = ser.finish();
468
469		let mut de = KeyDeserializer::from_bytes(&bytes);
470		assert!((de.read_f32().unwrap() - 3.14).abs() < 0.001);
471		assert!((de.read_f64().unwrap() - E).abs() < 0.000001);
472		assert!(de.is_empty());
473	}
474
475	#[test]
476	fn test_read_bytes() {
477		let mut ser = KeySerializer::new();
478		ser.extend_bytes(b"hello").extend_bytes(&[0x01, 0xff, 0x02]);
479		let bytes = ser.finish();
480
481		let mut de = KeyDeserializer::from_bytes(&bytes);
482		assert_eq!(de.read_bytes().unwrap(), b"hello");
483		assert_eq!(de.read_bytes().unwrap(), vec![0x01, 0xff, 0x02]);
484		assert!(de.is_empty());
485	}
486
487	#[test]
488	fn test_read_str() {
489		let mut ser = KeySerializer::new();
490		ser.extend_str("hello world").extend_str("👋");
491		let bytes = ser.finish();
492
493		let mut de = KeyDeserializer::from_bytes(&bytes);
494		assert_eq!(de.read_str().unwrap(), "hello world");
495		assert_eq!(de.read_str().unwrap(), "👋");
496		assert!(de.is_empty());
497	}
498
499	#[test]
500	fn test_read_date() {
501		use reifydb_type::Date;
502		let mut ser = KeySerializer::new();
503		let date = Date::from_ymd(2024, 1, 1).unwrap();
504		ser.extend_date(&date);
505		let bytes = ser.finish();
506
507		let mut de = KeyDeserializer::from_bytes(&bytes);
508		assert_eq!(de.read_date().unwrap(), date);
509		assert!(de.is_empty());
510	}
511
512	#[test]
513	fn test_read_datetime() {
514		use reifydb_type::DateTime;
515		let mut ser = KeySerializer::new();
516		let datetime = DateTime::from_ymd_hms(2024, 1, 1, 12, 30, 45).unwrap();
517		ser.extend_datetime(&datetime);
518		let bytes = ser.finish();
519
520		let mut de = KeyDeserializer::from_bytes(&bytes);
521		assert_eq!(de.read_datetime().unwrap(), datetime);
522		assert!(de.is_empty());
523	}
524
525	#[test]
526	fn test_read_time() {
527		use reifydb_type::Time;
528		let mut ser = KeySerializer::new();
529		let time = Time::from_hms(12, 30, 45).unwrap();
530		ser.extend_time(&time);
531		let bytes = ser.finish();
532
533		let mut de = KeyDeserializer::from_bytes(&bytes);
534		assert_eq!(de.read_time().unwrap(), time);
535		assert!(de.is_empty());
536	}
537
538	#[test]
539	fn test_read_duration() {
540		use reifydb_type::Duration;
541		let mut ser = KeySerializer::new();
542		let duration = Duration::from_nanoseconds(1000000);
543		ser.extend_duration(&duration);
544		let bytes = ser.finish();
545
546		let mut de = KeyDeserializer::from_bytes(&bytes);
547		assert_eq!(de.read_duration().unwrap(), duration);
548		assert!(de.is_empty());
549	}
550
551	#[test]
552	fn test_read_row_number() {
553		use reifydb_type::RowNumber;
554		let mut ser = KeySerializer::new();
555		let row = RowNumber(42);
556		ser.extend_row_number(&row);
557		let bytes = ser.finish();
558
559		let mut de = KeyDeserializer::from_bytes(&bytes);
560		assert_eq!(de.read_row_number().unwrap(), row);
561		assert!(de.is_empty());
562	}
563
564	#[test]
565	fn test_read_source_id() {
566		use crate::interface::SourceId;
567		let mut ser = KeySerializer::new();
568		let source = SourceId::table(42);
569		ser.extend_source_id(source);
570		let bytes = ser.finish();
571
572		let mut de = KeyDeserializer::from_bytes(&bytes);
573		assert_eq!(de.read_source_id().unwrap(), source);
574		assert!(de.is_empty());
575	}
576
577	#[test]
578	fn test_read_index_id() {
579		use crate::interface::IndexId;
580		let mut ser = KeySerializer::new();
581		let index = IndexId::primary(999);
582		ser.extend_index_id(index);
583		let bytes = ser.finish();
584
585		let mut de = KeyDeserializer::from_bytes(&bytes);
586		assert_eq!(de.read_index_id().unwrap(), index);
587		assert!(de.is_empty());
588	}
589
590	#[test]
591	fn test_position_tracking() {
592		let mut ser = KeySerializer::new();
593		ser.extend_u8(1u8).extend_u16(2u16).extend_u32(3u32);
594		let bytes = ser.finish();
595
596		let mut de = KeyDeserializer::from_bytes(&bytes);
597		assert_eq!(de.position(), 0);
598		assert_eq!(de.remaining(), 7);
599
600		de.read_u8().unwrap();
601		assert_eq!(de.position(), 1);
602		assert_eq!(de.remaining(), 6);
603
604		de.read_u16().unwrap();
605		assert_eq!(de.position(), 3);
606		assert_eq!(de.remaining(), 4);
607
608		de.read_u32().unwrap();
609		assert_eq!(de.position(), 7);
610		assert_eq!(de.remaining(), 0);
611		assert!(de.is_empty());
612	}
613
614	#[test]
615	fn test_error_on_insufficient_bytes() {
616		let bytes = vec![0x00, 0x01];
617		let mut de = KeyDeserializer::from_bytes(&bytes);
618		assert!(de.read_u32().is_err());
619	}
620
621	#[test]
622	fn test_chaining() {
623		let mut ser = KeySerializer::new();
624		ser.extend_bool(true).extend_i32(42i32).extend_str("test").extend_u64(1000u64);
625		let bytes = ser.finish();
626
627		let mut de = KeyDeserializer::from_bytes(&bytes);
628		assert_eq!(de.read_bool().unwrap(), true);
629		assert_eq!(de.read_i32().unwrap(), 42);
630		assert_eq!(de.read_str().unwrap(), "test");
631		assert_eq!(de.read_u64().unwrap(), 1000);
632		assert!(de.is_empty());
633	}
634}