1use thiserror::Error;
39
40use crate::registry::FieldId;
41
42const HEADER_LEN: usize = 8;
43const OFFSET_ENTRY_LEN: usize = 4;
44
45const TYPE_NULL: u8 = 0x00;
46const TYPE_BOOL_FALSE: u8 = 0x01;
47const TYPE_BOOL_TRUE: u8 = 0x02;
48const TYPE_I64: u8 = 0x03;
49const TYPE_F64: u8 = 0x04;
50const TYPE_STR_INLINE: u8 = 0x05;
51const TYPE_STR_DICTREF: u8 = 0x06;
52const TYPE_ARRAY: u8 = 0x07;
53
54#[derive(Debug, Clone, Copy)]
55struct Layout {
56 field_count: usize,
57 type_table_start: usize,
58 data_region_start: usize,
59}
60
61#[derive(Debug, Clone, Copy)]
62struct OffsetEntry {
63 field_id: FieldId,
64 data_offset: u16,
65}
66
67#[derive(Debug, Clone, PartialEq)]
69pub enum FieldValue {
70 Null,
72 Bool(bool),
74 I64(i64),
76 F64(f64),
78 InlineBytes(Vec<u8>),
80 DictRef(u32),
82 ArrayBytes(Vec<u8>),
84}
85
86#[derive(Debug, Error, PartialEq, Eq)]
88pub enum PackedDocError {
89 #[error("malformed packed document: {0}")]
91 Malformed(&'static str),
92 #[error("too many fields in packed document: {0}")]
94 TooManyFields(usize),
95 #[error("duplicate field id in packed document: {0}")]
97 DuplicateFieldId(FieldId),
98 #[error("field {field_id} payload too large: {len} bytes")]
100 FieldDataTooLarge {
101 field_id: FieldId,
103 len: usize,
105 },
106 #[error("data region too large: {0} bytes")]
108 DataRegionTooLarge(usize),
109 #[error("unknown field type tag: 0x{0:02x}")]
111 UnknownTypeTag(u8),
112 #[error("invalid data for field {field_id}: {reason}")]
114 InvalidFieldData {
115 field_id: FieldId,
117 reason: &'static str,
119 },
120}
121
122#[derive(Debug, Clone, PartialEq)]
124pub struct PackedDoc {
125 data: Vec<u8>,
126}
127
128impl PackedDoc {
129 pub fn from_bytes(data: Vec<u8>) -> Result<Self, PackedDocError> {
131 let doc = Self { data };
132 doc.validate()?;
133 Ok(doc)
134 }
135
136 #[must_use]
138 pub fn as_bytes(&self) -> &[u8] {
139 &self.data
140 }
141
142 #[must_use]
144 pub fn into_bytes(self) -> Vec<u8> {
145 self.data
146 }
147
148 #[must_use]
150 pub fn byte_size(&self) -> usize {
151 self.data.len()
152 }
153
154 pub fn version(&self) -> Result<u16, PackedDocError> {
156 let _layout = self.layout()?;
157 Ok(u16::from_le_bytes([self.data[0], self.data[1]]))
158 }
159
160 pub fn field_count(&self) -> Result<usize, PackedDocError> {
162 Ok(self.layout()?.field_count)
163 }
164
165 pub fn updated_at(&self) -> Result<u32, PackedDocError> {
167 let _layout = self.layout()?;
168 Ok(u32::from_le_bytes([
169 self.data[4],
170 self.data[5],
171 self.data[6],
172 self.data[7],
173 ]))
174 }
175
176 pub fn read_field(&self, field_id: FieldId) -> Result<Option<FieldValue>, PackedDocError> {
178 let layout = self.layout()?;
179
180 let mut lo = 0usize;
181 let mut hi = layout.field_count;
182 while lo < hi {
183 let mid = lo + (hi - lo) / 2;
184 let entry = self.offset_entry(mid, &layout)?;
185 if entry.field_id == field_id {
186 let (_, value) = self.decode_at_index(mid, &layout)?;
187 return Ok(Some(value));
188 }
189 if entry.field_id < field_id {
190 lo = mid + 1;
191 } else {
192 hi = mid;
193 }
194 }
195
196 Ok(None)
197 }
198
199 pub fn read_fields(
201 &self,
202 field_ids: &[FieldId],
203 ) -> Result<Vec<Option<FieldValue>>, PackedDocError> {
204 field_ids.iter().map(|id| self.read_field(*id)).collect()
205 }
206
207 pub fn iter_fields(&self) -> Result<PackedDocIter<'_>, PackedDocError> {
209 let layout = self.layout()?;
210 Ok(PackedDocIter {
211 doc: self,
212 layout,
213 index: 0,
214 })
215 }
216
217 fn validate(&self) -> Result<(), PackedDocError> {
218 let layout = self.layout()?;
219 let mut previous_field_id: Option<FieldId> = None;
220 let mut previous_offset: Option<u16> = None;
221
222 for index in 0..layout.field_count {
223 let entry = self.offset_entry(index, &layout)?;
224
225 if let Some(prev_id) = previous_field_id {
226 if entry.field_id <= prev_id {
227 if entry.field_id == prev_id {
228 return Err(PackedDocError::DuplicateFieldId(entry.field_id));
229 }
230 return Err(PackedDocError::Malformed(
231 "field ids must be strictly ascending",
232 ));
233 }
234 }
235
236 if let Some(prev_offset) = previous_offset {
237 if entry.data_offset < prev_offset {
238 return Err(PackedDocError::Malformed(
239 "field offsets must be monotonically increasing",
240 ));
241 }
242 }
243
244 let _ = self.decode_at_index(index, &layout)?;
245 previous_field_id = Some(entry.field_id);
246 previous_offset = Some(entry.data_offset);
247 }
248
249 Ok(())
250 }
251
252 fn layout(&self) -> Result<Layout, PackedDocError> {
253 if self.data.len() < HEADER_LEN {
254 return Err(PackedDocError::Malformed("buffer shorter than header"));
255 }
256
257 let field_count = u16::from_le_bytes([self.data[2], self.data[3]]) as usize;
258
259 let offset_table_bytes = field_count
260 .checked_mul(OFFSET_ENTRY_LEN)
261 .ok_or(PackedDocError::Malformed("offset table length overflow"))?;
262 let type_table_start = HEADER_LEN
263 .checked_add(offset_table_bytes)
264 .ok_or(PackedDocError::Malformed("type table start overflow"))?;
265 let data_region_start = type_table_start
266 .checked_add(field_count)
267 .ok_or(PackedDocError::Malformed("data region start overflow"))?;
268
269 if self.data.len() < data_region_start {
270 return Err(PackedDocError::Malformed(
271 "buffer shorter than table region",
272 ));
273 }
274
275 Ok(Layout {
276 field_count,
277 type_table_start,
278 data_region_start,
279 })
280 }
281
282 fn offset_entry(&self, index: usize, layout: &Layout) -> Result<OffsetEntry, PackedDocError> {
283 if index >= layout.field_count {
284 return Err(PackedDocError::Malformed(
285 "offset entry index out of bounds",
286 ));
287 }
288
289 let start = HEADER_LEN
290 .checked_add(
291 index
292 .checked_mul(OFFSET_ENTRY_LEN)
293 .ok_or(PackedDocError::Malformed("offset entry start overflow"))?,
294 )
295 .ok_or(PackedDocError::Malformed("offset entry start overflow"))?;
296 let end = start
297 .checked_add(OFFSET_ENTRY_LEN)
298 .ok_or(PackedDocError::Malformed("offset entry end overflow"))?;
299
300 let bytes = self
301 .data
302 .get(start..end)
303 .ok_or(PackedDocError::Malformed("offset entry out of bounds"))?;
304
305 Ok(OffsetEntry {
306 field_id: u16::from_le_bytes([bytes[0], bytes[1]]),
307 data_offset: u16::from_le_bytes([bytes[2], bytes[3]]),
308 })
309 }
310
311 fn decode_at_index(
312 &self,
313 index: usize,
314 layout: &Layout,
315 ) -> Result<(FieldId, FieldValue), PackedDocError> {
316 let entry = self.offset_entry(index, layout)?;
317 let next_offset = if index + 1 < layout.field_count {
318 self.offset_entry(index + 1, layout)?.data_offset as usize
319 } else {
320 self.data
321 .len()
322 .checked_sub(layout.data_region_start)
323 .ok_or(PackedDocError::Malformed("data region underflow"))?
324 };
325 let current_offset = entry.data_offset as usize;
326
327 if next_offset < current_offset {
328 return Err(PackedDocError::Malformed("field offsets are not monotonic"));
329 }
330
331 let data_start = layout
332 .data_region_start
333 .checked_add(current_offset)
334 .ok_or(PackedDocError::Malformed("field data start overflow"))?;
335 let data_end = layout
336 .data_region_start
337 .checked_add(next_offset)
338 .ok_or(PackedDocError::Malformed("field data end overflow"))?;
339
340 let tag = *self
341 .data
342 .get(layout.type_table_start + index)
343 .ok_or(PackedDocError::Malformed("type tag out of bounds"))?;
344 let slice = self
345 .data
346 .get(data_start..data_end)
347 .ok_or(PackedDocError::Malformed("field data out of bounds"))?;
348 let value = decode_field_value(tag, slice, entry.field_id)?;
349
350 Ok((entry.field_id, value))
351 }
352}
353
354pub struct PackedDocIter<'a> {
356 doc: &'a PackedDoc,
357 layout: Layout,
358 index: usize,
359}
360
361impl<'a> Iterator for PackedDocIter<'a> {
362 type Item = Result<(FieldId, FieldValue), PackedDocError>;
363
364 fn next(&mut self) -> Option<Self::Item> {
365 if self.index >= self.layout.field_count {
366 return None;
367 }
368
369 let result = self.doc.decode_at_index(self.index, &self.layout);
370 self.index += 1;
371 Some(result)
372 }
373}
374
375#[derive(Debug, Clone)]
376struct BuilderField {
377 field_id: FieldId,
378 tag: u8,
379 data: Vec<u8>,
380}
381
382#[derive(Debug, Default)]
384pub struct PackedDocBuilder {
385 version: u16,
386 fields: Vec<BuilderField>,
387}
388
389impl PackedDocBuilder {
390 #[must_use]
392 pub fn new(version: u16) -> Self {
393 Self {
394 version,
395 fields: Vec::new(),
396 }
397 }
398
399 pub fn add_field(
401 &mut self,
402 field_id: FieldId,
403 value: FieldValue,
404 ) -> Result<(), PackedDocError> {
405 let (tag, data) = encode_field_value(field_id, value)?;
406 self.fields.push(BuilderField {
407 field_id,
408 tag,
409 data,
410 });
411 Ok(())
412 }
413
414 pub fn build(mut self, updated_at: u32) -> Result<PackedDoc, PackedDocError> {
416 self.fields.sort_by_key(|f| f.field_id);
417
418 for window in self.fields.windows(2) {
419 if window[0].field_id == window[1].field_id {
420 return Err(PackedDocError::DuplicateFieldId(window[0].field_id));
421 }
422 }
423
424 let field_count = self.fields.len();
425 let field_count_u16 =
426 u16::try_from(field_count).map_err(|_| PackedDocError::TooManyFields(field_count))?;
427 let data_size = self.fields.iter().try_fold(0usize, |acc, field| {
428 acc.checked_add(field.data.len())
429 .ok_or(PackedDocError::DataRegionTooLarge(usize::MAX))
430 })?;
431
432 if data_size > u16::MAX as usize {
433 return Err(PackedDocError::DataRegionTooLarge(data_size));
434 }
435
436 let offset_table_size = field_count
437 .checked_mul(OFFSET_ENTRY_LEN)
438 .ok_or(PackedDocError::Malformed("offset table size overflow"))?;
439 let type_table_size = field_count;
440 let total_size = HEADER_LEN
441 .checked_add(offset_table_size)
442 .and_then(|n| n.checked_add(type_table_size))
443 .and_then(|n| n.checked_add(data_size))
444 .ok_or(PackedDocError::Malformed("packed document size overflow"))?;
445
446 let mut data = Vec::with_capacity(total_size);
447
448 data.extend_from_slice(&self.version.to_le_bytes());
449 data.extend_from_slice(&field_count_u16.to_le_bytes());
450 data.extend_from_slice(&updated_at.to_le_bytes());
451
452 let mut data_offset = 0u16;
453 for field in &self.fields {
454 data.extend_from_slice(&field.field_id.to_le_bytes());
455 data.extend_from_slice(&data_offset.to_le_bytes());
456 data_offset = data_offset
457 .checked_add(field.data.len() as u16)
458 .ok_or(PackedDocError::DataRegionTooLarge(data_size))?;
459 }
460
461 for field in &self.fields {
462 data.push(field.tag);
463 }
464
465 for field in &self.fields {
466 data.extend_from_slice(&field.data);
467 }
468
469 PackedDoc::from_bytes(data)
470 }
471}
472
473fn encode_field_value(
474 field_id: FieldId,
475 value: FieldValue,
476) -> Result<(u8, Vec<u8>), PackedDocError> {
477 match value {
478 FieldValue::Null => Ok((TYPE_NULL, Vec::new())),
479 FieldValue::Bool(false) => Ok((TYPE_BOOL_FALSE, Vec::new())),
480 FieldValue::Bool(true) => Ok((TYPE_BOOL_TRUE, Vec::new())),
481 FieldValue::I64(v) => Ok((TYPE_I64, v.to_le_bytes().to_vec())),
482 FieldValue::F64(v) => Ok((TYPE_F64, v.to_le_bytes().to_vec())),
483 FieldValue::InlineBytes(bytes) => {
484 if bytes.len() > u16::MAX as usize {
485 return Err(PackedDocError::FieldDataTooLarge {
486 field_id,
487 len: bytes.len(),
488 });
489 }
490 let mut data = Vec::with_capacity(bytes.len() + 2);
491 data.extend_from_slice(&(bytes.len() as u16).to_le_bytes());
492 data.extend_from_slice(&bytes);
493 Ok((TYPE_STR_INLINE, data))
494 }
495 FieldValue::DictRef(id) => Ok((TYPE_STR_DICTREF, id.to_le_bytes().to_vec())),
496 FieldValue::ArrayBytes(bytes) => {
497 if bytes.len() > u16::MAX as usize {
498 return Err(PackedDocError::FieldDataTooLarge {
499 field_id,
500 len: bytes.len(),
501 });
502 }
503 let mut data = Vec::with_capacity(bytes.len() + 2);
504 data.extend_from_slice(&(bytes.len() as u16).to_le_bytes());
505 data.extend_from_slice(&bytes);
506 Ok((TYPE_ARRAY, data))
507 }
508 }
509}
510
511fn decode_field_value(
512 tag: u8,
513 data: &[u8],
514 field_id: FieldId,
515) -> Result<FieldValue, PackedDocError> {
516 match tag {
517 TYPE_NULL => {
518 if !data.is_empty() {
519 return Err(PackedDocError::InvalidFieldData {
520 field_id,
521 reason: "null field must not have payload bytes",
522 });
523 }
524 Ok(FieldValue::Null)
525 }
526 TYPE_BOOL_FALSE => {
527 if !data.is_empty() {
528 return Err(PackedDocError::InvalidFieldData {
529 field_id,
530 reason: "bool field must not have payload bytes",
531 });
532 }
533 Ok(FieldValue::Bool(false))
534 }
535 TYPE_BOOL_TRUE => {
536 if !data.is_empty() {
537 return Err(PackedDocError::InvalidFieldData {
538 field_id,
539 reason: "bool field must not have payload bytes",
540 });
541 }
542 Ok(FieldValue::Bool(true))
543 }
544 TYPE_I64 => {
545 if data.len() != 8 {
546 return Err(PackedDocError::InvalidFieldData {
547 field_id,
548 reason: "i64 field payload must be exactly 8 bytes",
549 });
550 }
551 let bytes: [u8; 8] = data
552 .try_into()
553 .map_err(|_| PackedDocError::InvalidFieldData {
554 field_id,
555 reason: "i64 conversion failed",
556 })?;
557 Ok(FieldValue::I64(i64::from_le_bytes(bytes)))
558 }
559 TYPE_F64 => {
560 if data.len() != 8 {
561 return Err(PackedDocError::InvalidFieldData {
562 field_id,
563 reason: "f64 field payload must be exactly 8 bytes",
564 });
565 }
566 let bytes: [u8; 8] = data
567 .try_into()
568 .map_err(|_| PackedDocError::InvalidFieldData {
569 field_id,
570 reason: "f64 conversion failed",
571 })?;
572 Ok(FieldValue::F64(f64::from_le_bytes(bytes)))
573 }
574 TYPE_STR_INLINE => {
575 if data.len() < 2 {
576 return Err(PackedDocError::InvalidFieldData {
577 field_id,
578 reason: "inline value missing length prefix",
579 });
580 }
581 let len = u16::from_le_bytes([data[0], data[1]]) as usize;
582 if data.len() != len + 2 {
583 return Err(PackedDocError::InvalidFieldData {
584 field_id,
585 reason: "inline value length prefix mismatch",
586 });
587 }
588 Ok(FieldValue::InlineBytes(data[2..].to_vec()))
589 }
590 TYPE_STR_DICTREF => {
591 if data.len() != 4 {
592 return Err(PackedDocError::InvalidFieldData {
593 field_id,
594 reason: "dict ref payload must be 4 bytes",
595 });
596 }
597 let bytes: [u8; 4] = data
598 .try_into()
599 .map_err(|_| PackedDocError::InvalidFieldData {
600 field_id,
601 reason: "dict ref conversion failed",
602 })?;
603 Ok(FieldValue::DictRef(u32::from_le_bytes(bytes)))
604 }
605 TYPE_ARRAY => {
606 if data.len() < 2 {
607 return Err(PackedDocError::InvalidFieldData {
608 field_id,
609 reason: "array value missing length prefix",
610 });
611 }
612 let len = u16::from_le_bytes([data[0], data[1]]) as usize;
613 if data.len() != len + 2 {
614 return Err(PackedDocError::InvalidFieldData {
615 field_id,
616 reason: "array value length prefix mismatch",
617 });
618 }
619 Ok(FieldValue::ArrayBytes(data[2..].to_vec()))
620 }
621 _ => Err(PackedDocError::UnknownTypeTag(tag)),
622 }
623}
624
625#[cfg(test)]
626mod tests {
627 use super::*;
628
629 #[test]
630 fn build_and_read_round_trip() {
631 let mut builder = PackedDocBuilder::new(1);
632 builder
633 .add_field(2, FieldValue::I64(42))
634 .expect("field should be added");
635 builder
636 .add_field(1, FieldValue::InlineBytes(b"augustus".to_vec()))
637 .expect("field should be added");
638 builder
639 .add_field(4, FieldValue::DictRef(7))
640 .expect("field should be added");
641 builder
642 .add_field(3, FieldValue::Bool(true))
643 .expect("field should be added");
644
645 let doc = builder.build(123).expect("doc should build");
646
647 assert_eq!(doc.version().expect("version"), 1);
648 assert_eq!(doc.updated_at().expect("updated_at"), 123);
649 assert_eq!(doc.field_count().expect("field_count"), 4);
650 assert_eq!(
651 doc.read_field(1).expect("read should succeed"),
652 Some(FieldValue::InlineBytes(b"augustus".to_vec()))
653 );
654 assert_eq!(
655 doc.read_field(2).expect("read should succeed"),
656 Some(FieldValue::I64(42))
657 );
658 assert_eq!(
659 doc.read_field(3).expect("read should succeed"),
660 Some(FieldValue::Bool(true))
661 );
662 assert_eq!(
663 doc.read_field(4).expect("read should succeed"),
664 Some(FieldValue::DictRef(7))
665 );
666 assert_eq!(doc.read_field(9).expect("read should succeed"), None);
667 }
668
669 #[test]
670 fn iterator_returns_sorted_fields() {
671 let mut builder = PackedDocBuilder::new(1);
672 builder
673 .add_field(4, FieldValue::Null)
674 .expect("field should be added");
675 builder
676 .add_field(2, FieldValue::Bool(false))
677 .expect("field should be added");
678 builder
679 .add_field(3, FieldValue::F64(1.5))
680 .expect("field should be added");
681
682 let doc = builder.build(0).expect("doc should build");
683 let fields: Vec<(FieldId, FieldValue)> = doc
684 .iter_fields()
685 .expect("iterator should be created")
686 .collect::<Result<_, _>>()
687 .expect("iteration should decode");
688 let field_ids: Vec<FieldId> = fields.iter().map(|(id, _)| *id).collect();
689 assert_eq!(field_ids, vec![2, 3, 4]);
690 }
691
692 #[test]
693 fn duplicate_field_ids_are_rejected() {
694 let mut builder = PackedDocBuilder::new(1);
695 builder
696 .add_field(1, FieldValue::Null)
697 .expect("field should be added");
698 builder
699 .add_field(1, FieldValue::Bool(false))
700 .expect("field should be added");
701
702 let err = builder.build(0).expect_err("duplicate fields must fail");
703 assert_eq!(err, PackedDocError::DuplicateFieldId(1));
704 }
705
706 #[test]
707 fn oversized_field_payload_is_rejected() {
708 let mut builder = PackedDocBuilder::new(1);
709 let large = vec![0u8; 70_000];
710 let err = builder
711 .add_field(2, FieldValue::InlineBytes(large))
712 .expect_err("oversized field must fail");
713 assert_eq!(
714 err,
715 PackedDocError::FieldDataTooLarge {
716 field_id: 2,
717 len: 70_000
718 }
719 );
720 }
721
722 #[test]
723 fn malformed_doc_is_rejected() {
724 let err = PackedDoc::from_bytes(vec![1, 2, 3]).expect_err("malformed doc should fail");
725 assert_eq!(err, PackedDocError::Malformed("buffer shorter than header"));
726 }
727}