1use std::borrow::Cow;
6use std::io::Read;
7
8use rustc_hash::{FxHashMap, FxHashSet};
9
10use crate::codec::op::{decode_op, encode_op};
11use crate::codec::primitives::{Reader, Writer};
12use crate::error::{DecodeError, EncodeError};
13use crate::limits::{
14 FORMAT_VERSION, MAGIC_COMPRESSED, MAGIC_UNCOMPRESSED, MAX_AUTHORS, MAX_DICT_SIZE,
15 MAX_EDIT_SIZE, MAX_OPS_PER_EDIT, MAX_STRING_LEN, MIN_FORMAT_VERSION,
16};
17use crate::model::{DataType, DictionaryBuilder, Edit, Id, Op, WireDictionaries};
18
19pub fn decompress(input: &[u8]) -> Result<Vec<u8>, DecodeError> {
33 if input.len() < 5 {
34 return Err(DecodeError::UnexpectedEof { context: "magic" });
35 }
36 if &input[0..5] != MAGIC_COMPRESSED {
37 let mut found = [0u8; 4];
38 found.copy_from_slice(&input[0..4]);
39 return Err(DecodeError::InvalidMagic { found });
40 }
41 decompress_zstd(&input[5..])
42}
43
44pub fn decode_edit(input: &[u8]) -> Result<Edit<'_>, DecodeError> {
61 if input.len() < 4 {
62 return Err(DecodeError::UnexpectedEof { context: "magic" });
63 }
64
65 if input.len() >= 5 && &input[0..5] == MAGIC_COMPRESSED {
67 let decompressed = decompress_zstd(&input[5..])?;
70 if decompressed.len() > MAX_EDIT_SIZE {
71 return Err(DecodeError::LengthExceedsLimit {
72 field: "edit",
73 len: decompressed.len(),
74 max: MAX_EDIT_SIZE,
75 });
76 }
77 decode_edit_owned(&decompressed)
78 } else if &input[0..4] == MAGIC_UNCOMPRESSED {
79 if input.len() > MAX_EDIT_SIZE {
81 return Err(DecodeError::LengthExceedsLimit {
82 field: "edit",
83 len: input.len(),
84 max: MAX_EDIT_SIZE,
85 });
86 }
87 decode_edit_borrowed(input)
88 } else {
89 let mut found = [0u8; 4];
90 found.copy_from_slice(&input[0..4]);
91 Err(DecodeError::InvalidMagic { found })
92 }
93}
94
95fn decode_edit_borrowed(input: &[u8]) -> Result<Edit<'_>, DecodeError> {
97 let mut reader = Reader::new(input);
98
99 reader.read_bytes(4, "magic")?;
101
102 let version = reader.read_byte("version")?;
104 if version < MIN_FORMAT_VERSION || version > FORMAT_VERSION {
105 return Err(DecodeError::UnsupportedVersion { version });
106 }
107
108 let edit_id = reader.read_id("edit_id")?;
110 let name = Cow::Borrowed(reader.read_str(MAX_STRING_LEN, "name")?);
111 let authors = reader.read_id_vec(MAX_AUTHORS, "authors")?;
112 let created_at = reader.read_signed_varint("created_at")?;
113
114 let property_count = reader.read_varint("property_count")? as usize;
116 if property_count > MAX_DICT_SIZE {
117 return Err(DecodeError::LengthExceedsLimit {
118 field: "properties",
119 len: property_count,
120 max: MAX_DICT_SIZE,
121 });
122 }
123 let mut properties = Vec::with_capacity(property_count);
124 let mut seen_props = FxHashSet::with_capacity_and_hasher(property_count, Default::default());
125 for _ in 0..property_count {
126 let id = reader.read_id("property_id")?;
127 if !seen_props.insert(id) {
128 return Err(DecodeError::DuplicateDictionaryEntry { dict: "properties", id });
129 }
130 let dt_byte = reader.read_byte("data_type")?;
131 let data_type = DataType::from_u8(dt_byte)
132 .ok_or(DecodeError::InvalidDataType { data_type: dt_byte })?;
133 properties.push((id, data_type));
134 }
135
136 let relation_types = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "relation_types")?;
137 let languages = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "languages")?;
138 let units = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "units")?;
139 let objects = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "objects")?;
140
141 let dicts = WireDictionaries {
142 properties,
143 relation_types,
144 languages,
145 units,
146 objects,
147 };
148
149 let op_count = reader.read_varint("op_count")? as usize;
151 if op_count > MAX_OPS_PER_EDIT {
152 return Err(DecodeError::LengthExceedsLimit {
153 field: "ops",
154 len: op_count,
155 max: MAX_OPS_PER_EDIT,
156 });
157 }
158
159 let mut ops = Vec::with_capacity(op_count);
160 for _ in 0..op_count {
161 ops.push(decode_op(&mut reader, &dicts)?);
162 }
163
164 Ok(Edit {
165 id: edit_id,
166 name,
167 authors,
168 created_at,
169 ops,
170 })
171}
172
173fn decode_edit_owned(data: &[u8]) -> Result<Edit<'static>, DecodeError> {
175 let mut reader = Reader::new(data);
176
177 reader.read_bytes(4, "magic")?;
179
180 let version = reader.read_byte("version")?;
182 if version < MIN_FORMAT_VERSION || version > FORMAT_VERSION {
183 return Err(DecodeError::UnsupportedVersion { version });
184 }
185
186 let edit_id = reader.read_id("edit_id")?;
188 let name = Cow::Owned(reader.read_string(MAX_STRING_LEN, "name")?);
189 let authors = reader.read_id_vec(MAX_AUTHORS, "authors")?;
190 let created_at = reader.read_signed_varint("created_at")?;
191
192 let property_count = reader.read_varint("property_count")? as usize;
194 if property_count > MAX_DICT_SIZE {
195 return Err(DecodeError::LengthExceedsLimit {
196 field: "properties",
197 len: property_count,
198 max: MAX_DICT_SIZE,
199 });
200 }
201 let mut properties = Vec::with_capacity(property_count);
202 let mut seen_props = FxHashSet::with_capacity_and_hasher(property_count, Default::default());
203 for _ in 0..property_count {
204 let id = reader.read_id("property_id")?;
205 if !seen_props.insert(id) {
206 return Err(DecodeError::DuplicateDictionaryEntry { dict: "properties", id });
207 }
208 let dt_byte = reader.read_byte("data_type")?;
209 let data_type = DataType::from_u8(dt_byte)
210 .ok_or(DecodeError::InvalidDataType { data_type: dt_byte })?;
211 properties.push((id, data_type));
212 }
213
214 let relation_types = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "relation_types")?;
215 let languages = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "languages")?;
216 let units = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "units")?;
217 let objects = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "objects")?;
218
219 let dicts = WireDictionaries {
220 properties,
221 relation_types,
222 languages,
223 units,
224 objects,
225 };
226
227 let op_count = reader.read_varint("op_count")? as usize;
229 if op_count > MAX_OPS_PER_EDIT {
230 return Err(DecodeError::LengthExceedsLimit {
231 field: "ops",
232 len: op_count,
233 max: MAX_OPS_PER_EDIT,
234 });
235 }
236
237 let mut ops = Vec::with_capacity(op_count);
238 for _ in 0..op_count {
239 ops.push(decode_op_owned(&mut reader, &dicts)?);
240 }
241
242 Ok(Edit {
243 id: edit_id,
244 name,
245 authors,
246 created_at,
247 ops,
248 })
249}
250
251fn decode_op_owned(reader: &mut Reader<'_>, dicts: &WireDictionaries) -> Result<Op<'static>, DecodeError> {
253 let op = decode_op(reader, dicts)?;
255 Ok(op_to_owned(op))
256}
257
258fn op_to_owned(op: Op<'_>) -> Op<'static> {
260 match op {
261 Op::CreateEntity(ce) => Op::CreateEntity(crate::model::CreateEntity {
262 id: ce.id,
263 values: ce.values.into_iter().map(pv_to_owned).collect(),
264 }),
265 Op::UpdateEntity(ue) => Op::UpdateEntity(crate::model::UpdateEntity {
266 id: ue.id,
267 set_properties: ue.set_properties.into_iter().map(pv_to_owned).collect(),
268 unset_values: ue.unset_values,
269 }),
270 Op::DeleteEntity(de) => Op::DeleteEntity(de),
271 Op::RestoreEntity(re) => Op::RestoreEntity(re),
272 Op::CreateRelation(cr) => Op::CreateRelation(crate::model::CreateRelation {
273 id: cr.id,
274 relation_type: cr.relation_type,
275 from: cr.from,
276 from_is_value_ref: cr.from_is_value_ref,
277 to: cr.to,
278 to_is_value_ref: cr.to_is_value_ref,
279 entity: cr.entity,
280 position: cr.position.map(|p| Cow::Owned(p.into_owned())),
281 from_space: cr.from_space,
282 from_version: cr.from_version,
283 to_space: cr.to_space,
284 to_version: cr.to_version,
285 }),
286 Op::UpdateRelation(ur) => Op::UpdateRelation(crate::model::UpdateRelation {
287 id: ur.id,
288 from_space: ur.from_space,
289 from_version: ur.from_version,
290 to_space: ur.to_space,
291 to_version: ur.to_version,
292 position: ur.position.map(|p| Cow::Owned(p.into_owned())),
293 unset: ur.unset,
294 }),
295 Op::DeleteRelation(dr) => Op::DeleteRelation(dr),
296 Op::RestoreRelation(rr) => Op::RestoreRelation(rr),
297 Op::CreateValueRef(cvr) => Op::CreateValueRef(cvr),
298 }
299}
300
301fn pv_to_owned(pv: crate::model::PropertyValue<'_>) -> crate::model::PropertyValue<'static> {
303 crate::model::PropertyValue {
304 property: pv.property,
305 value: value_to_owned(pv.value),
306 }
307}
308
309fn value_to_owned(v: crate::model::Value<'_>) -> crate::model::Value<'static> {
311 use crate::model::{DecimalMantissa, Value};
312 match v {
313 Value::Bool(b) => Value::Bool(b),
314 Value::Int64 { value, unit } => Value::Int64 { value, unit },
315 Value::Float64 { value, unit } => Value::Float64 { value, unit },
316 Value::Decimal { exponent, mantissa, unit } => Value::Decimal {
317 exponent,
318 mantissa: match mantissa {
319 DecimalMantissa::I64(i) => DecimalMantissa::I64(i),
320 DecimalMantissa::Big(b) => DecimalMantissa::Big(Cow::Owned(b.into_owned())),
321 },
322 unit,
323 },
324 Value::Text { value, language } => Value::Text {
325 value: Cow::Owned(value.into_owned()),
326 language,
327 },
328 Value::Bytes(b) => Value::Bytes(Cow::Owned(b.into_owned())),
329 Value::Date(s) => Value::Date(Cow::Owned(s.into_owned())),
330 Value::Time(s) => Value::Time(Cow::Owned(s.into_owned())),
331 Value::Datetime(s) => Value::Datetime(Cow::Owned(s.into_owned())),
332 Value::Schedule(s) => Value::Schedule(Cow::Owned(s.into_owned())),
333 Value::Point { lon, lat, alt } => Value::Point { lon, lat, alt },
334 Value::Embedding { sub_type, dims, data } => Value::Embedding {
335 sub_type,
336 dims,
337 data: Cow::Owned(data.into_owned()),
338 },
339 }
340}
341
342fn read_id_vec_no_duplicates(
344 reader: &mut Reader<'_>,
345 max_len: usize,
346 field: &'static str,
347) -> Result<Vec<Id>, DecodeError> {
348 let count = reader.read_varint(field)? as usize;
349 if count > max_len {
350 return Err(DecodeError::LengthExceedsLimit {
351 field,
352 len: count,
353 max: max_len,
354 });
355 }
356
357 let mut ids = Vec::with_capacity(count);
358 let mut seen = FxHashSet::with_capacity_and_hasher(count, Default::default());
359
360 for _ in 0..count {
361 let id = reader.read_id(field)?;
362 if !seen.insert(id) {
363 return Err(DecodeError::DuplicateDictionaryEntry { dict: field, id });
364 }
365 ids.push(id);
366 }
367
368 Ok(ids)
369}
370
371fn decompress_zstd(compressed: &[u8]) -> Result<Vec<u8>, DecodeError> {
372 let mut reader = Reader::new(compressed);
374 let declared_size = reader.read_varint("uncompressed_size")? as usize;
375
376 if declared_size > MAX_EDIT_SIZE {
377 return Err(DecodeError::LengthExceedsLimit {
378 field: "uncompressed_size",
379 len: declared_size,
380 max: MAX_EDIT_SIZE,
381 });
382 }
383
384 let compressed_data = reader.remaining();
385
386 let mut decoder = zstd::Decoder::new(compressed_data)
387 .map_err(|e| DecodeError::DecompressionFailed(e.to_string()))?;
388
389 let mut decompressed = Vec::with_capacity(declared_size);
390 decoder
391 .read_to_end(&mut decompressed)
392 .map_err(|e| DecodeError::DecompressionFailed(e.to_string()))?;
393
394 if decompressed.len() != declared_size {
395 return Err(DecodeError::UncompressedSizeMismatch {
396 declared: declared_size,
397 actual: decompressed.len(),
398 });
399 }
400
401 Ok(decompressed)
402}
403
404#[derive(Debug, Clone, Copy, Default)]
410pub struct EncodeOptions {
411 pub canonical: bool,
425}
426
427impl EncodeOptions {
428 pub fn new() -> Self {
430 Self::default()
431 }
432
433 pub fn canonical() -> Self {
435 Self { canonical: true }
436 }
437}
438
439pub fn encode_edit(edit: &Edit) -> Result<Vec<u8>, EncodeError> {
444 encode_edit_with_options(edit, EncodeOptions::default())
445}
446
447pub fn encode_edit_with_options(edit: &Edit, options: EncodeOptions) -> Result<Vec<u8>, EncodeError> {
449 if options.canonical {
450 encode_edit_canonical(edit)
451 } else {
452 encode_edit_fast(edit)
453 }
454}
455
456fn encode_edit_fast(edit: &Edit) -> Result<Vec<u8>, EncodeError> {
458 let property_types = rustc_hash::FxHashMap::default();
460
461 let mut dict_builder = DictionaryBuilder::with_capacity(edit.ops.len());
463 let mut ops_writer = Writer::with_capacity(edit.ops.len() * 50);
464
465 for op in &edit.ops {
466 encode_op(&mut ops_writer, op, &mut dict_builder, &property_types)?;
467 }
468
469 let ops_bytes = ops_writer.into_bytes();
471 let mut writer = Writer::with_capacity(256 + ops_bytes.len());
472
473 writer.write_bytes(MAGIC_UNCOMPRESSED);
475 writer.write_byte(FORMAT_VERSION);
476
477 writer.write_id(&edit.id);
479 writer.write_string(&edit.name);
480 writer.write_id_vec(&edit.authors);
481 writer.write_signed_varint(edit.created_at);
482
483 dict_builder.write_dictionaries(&mut writer);
485
486 writer.write_varint(edit.ops.len() as u64);
488 writer.write_bytes(&ops_bytes);
489
490 Ok(writer.into_bytes())
491}
492
493fn encode_edit_canonical(edit: &Edit) -> Result<Vec<u8>, EncodeError> {
504 let property_types = rustc_hash::FxHashMap::default();
506
507 let mut dict_builder = DictionaryBuilder::with_capacity(edit.ops.len());
509 let mut temp_writer = Writer::with_capacity(edit.ops.len() * 50);
510 for op in &edit.ops {
511 encode_op(&mut temp_writer, op, &mut dict_builder, &property_types)?;
512 }
513
514 let sorted_builder = dict_builder.into_sorted();
516
517 let mut sorted_authors = edit.authors.clone();
519 sorted_authors.sort();
520 for i in 1..sorted_authors.len() {
522 if sorted_authors[i] == sorted_authors[i - 1] {
523 return Err(EncodeError::DuplicateAuthor { id: sorted_authors[i] });
524 }
525 }
526
527 let mut ops_writer = Writer::with_capacity(edit.ops.len() * 50);
529 let mut canonical_builder = sorted_builder.clone();
530 for op in &edit.ops {
531 encode_op_canonical(&mut ops_writer, op, &mut canonical_builder, &property_types)?;
532 }
533
534 let ops_bytes = ops_writer.into_bytes();
536 let mut writer = Writer::with_capacity(256 + ops_bytes.len());
537
538 writer.write_bytes(MAGIC_UNCOMPRESSED);
540 writer.write_byte(FORMAT_VERSION);
541
542 writer.write_id(&edit.id);
544 writer.write_string(&edit.name);
545 writer.write_id_vec(&sorted_authors);
546 writer.write_signed_varint(edit.created_at);
547
548 sorted_builder.write_dictionaries(&mut writer);
550
551 writer.write_varint(edit.ops.len() as u64);
553 writer.write_bytes(&ops_bytes);
554
555 Ok(writer.into_bytes())
556}
557
558fn encode_op_canonical(
560 writer: &mut Writer,
561 op: &Op<'_>,
562 dict_builder: &mut DictionaryBuilder,
563 property_types: &FxHashMap<Id, DataType>,
564) -> Result<(), EncodeError> {
565 match op {
566 Op::CreateEntity(ce) => {
567 let sorted_values = sort_and_check_values(&ce.values, dict_builder)?;
569
570 writer.write_byte(1); writer.write_id(&ce.id);
572 writer.write_varint(sorted_values.len() as u64);
573
574 for pv in &sorted_values {
575 let data_type = property_types.get(&pv.property)
576 .copied()
577 .unwrap_or_else(|| pv.value.data_type());
578 encode_property_value_canonical(writer, pv, dict_builder, data_type)?;
579 }
580 Ok(())
581 }
582 Op::UpdateEntity(ue) => {
583 let sorted_set = sort_and_check_values(&ue.set_properties, dict_builder)?;
585 let sorted_unset = sort_and_check_unsets(&ue.unset_values, dict_builder)?;
586
587 writer.write_byte(2); let id_index = dict_builder.add_object(ue.id);
589 writer.write_varint(id_index as u64);
590
591 let mut flags = 0u8;
592 if !sorted_set.is_empty() {
593 flags |= 0x01; }
595 if !sorted_unset.is_empty() {
596 flags |= 0x02; }
598 writer.write_byte(flags);
599
600 if !sorted_set.is_empty() {
601 writer.write_varint(sorted_set.len() as u64);
602 for pv in &sorted_set {
603 let data_type = property_types.get(&pv.property)
604 .copied()
605 .unwrap_or_else(|| pv.value.data_type());
606 encode_property_value_canonical(writer, pv, dict_builder, data_type)?;
607 }
608 }
609
610 if !sorted_unset.is_empty() {
611 use crate::model::UnsetLanguage;
612 writer.write_varint(sorted_unset.len() as u64);
613 for unset in &sorted_unset {
614 let prop_idx = dict_builder.add_property(unset.property, DataType::Bool);
615 writer.write_varint(prop_idx as u64);
616 let lang_value: u32 = match &unset.language {
617 UnsetLanguage::All => 0xFFFFFFFF,
618 UnsetLanguage::English => 0,
619 UnsetLanguage::Specific(lang_id) => {
620 dict_builder.add_language(Some(*lang_id)) as u32
621 }
622 };
623 writer.write_varint(lang_value as u64);
624 }
625 }
626 Ok(())
627 }
628 _ => encode_op(writer, op, dict_builder, property_types),
630 }
631}
632
633fn sort_and_check_values<'a>(
635 values: &[crate::model::PropertyValue<'a>],
636 dict_builder: &DictionaryBuilder,
637) -> Result<Vec<crate::model::PropertyValue<'a>>, EncodeError> {
638 use crate::model::{PropertyValue, Value};
639
640 if values.is_empty() {
641 return Ok(Vec::new());
642 }
643
644 let mut indexed: Vec<(usize, usize, usize, &PropertyValue<'a>)> = values
646 .iter()
647 .enumerate()
648 .map(|(i, pv)| {
649 let prop_idx = dict_builder.get_property_index(&pv.property).unwrap_or(0);
650 let lang_idx = match &pv.value {
651 Value::Text { language, .. } => dict_builder.get_language_index(language.as_ref()).unwrap_or(0),
652 _ => 0,
653 };
654 (prop_idx, lang_idx, i, pv)
655 })
656 .collect();
657
658 indexed.sort_by(|a, b| (a.0, a.1).cmp(&(b.0, b.1)));
660
661 for i in 1..indexed.len() {
663 if indexed[i].0 == indexed[i - 1].0 && indexed[i].1 == indexed[i - 1].1 {
664 let pv = indexed[i].3;
665 let language = match &pv.value {
666 Value::Text { language, .. } => *language,
667 _ => None,
668 };
669 return Err(EncodeError::DuplicateValue {
670 property: pv.property,
671 language,
672 });
673 }
674 }
675
676 Ok(indexed.into_iter().map(|(_, _, _, pv)| pv.clone()).collect())
678}
679
680fn sort_and_check_unsets(
682 unsets: &[crate::model::UnsetValue],
683 dict_builder: &DictionaryBuilder,
684) -> Result<Vec<crate::model::UnsetValue>, EncodeError> {
685 use crate::model::UnsetLanguage;
686
687 if unsets.is_empty() {
688 return Ok(Vec::new());
689 }
690
691 let mut indexed: Vec<(usize, u32, usize, &crate::model::UnsetValue)> = unsets
693 .iter()
694 .enumerate()
695 .map(|(i, up)| {
696 let prop_idx = dict_builder.get_property_index(&up.property).unwrap_or(0);
697 let lang_key: u32 = match &up.language {
698 UnsetLanguage::All => 0xFFFFFFFF,
699 UnsetLanguage::English => 0,
700 UnsetLanguage::Specific(lang_id) => {
701 dict_builder.get_language_index(Some(lang_id)).unwrap_or(0) as u32
702 }
703 };
704 (prop_idx, lang_key, i, up)
705 })
706 .collect();
707
708 indexed.sort_by(|a, b| (a.0, a.1).cmp(&(b.0, b.1)));
710
711 for i in 1..indexed.len() {
713 if indexed[i].0 == indexed[i - 1].0 && indexed[i].1 == indexed[i - 1].1 {
714 let up = indexed[i].3;
715 let language = match &up.language {
716 UnsetLanguage::All => None,
717 UnsetLanguage::English => None,
718 UnsetLanguage::Specific(id) => Some(*id),
719 };
720 return Err(EncodeError::DuplicateUnset {
721 property: up.property,
722 language,
723 });
724 }
725 }
726
727 Ok(indexed.into_iter().map(|(_, _, _, up)| up.clone()).collect())
728}
729
730fn encode_property_value_canonical(
732 writer: &mut Writer,
733 pv: &crate::model::PropertyValue<'_>,
734 dict_builder: &mut DictionaryBuilder,
735 data_type: DataType,
736) -> Result<(), EncodeError> {
737 let prop_index = dict_builder.add_property(pv.property, data_type);
738 writer.write_varint(prop_index as u64);
739 crate::codec::value::encode_value(writer, &pv.value, dict_builder)?;
740 Ok(())
741}
742
743pub fn encode_edit_profiled(edit: &Edit, profile: bool) -> Result<Vec<u8>, EncodeError> {
745 if !profile {
746 return encode_edit(edit);
747 }
748
749 use std::time::Instant;
750
751 let t0 = Instant::now();
752
753 let property_types = rustc_hash::FxHashMap::default();
755 let t1 = Instant::now();
756
757 let mut dict_builder = DictionaryBuilder::with_capacity(edit.ops.len());
759 let mut ops_writer = Writer::with_capacity(edit.ops.len() * 50);
760
761 for op in &edit.ops {
762 encode_op(&mut ops_writer, op, &mut dict_builder, &property_types)?;
763 }
764 let t2 = Instant::now();
765
766 let ops_bytes = ops_writer.into_bytes();
768 let mut writer = Writer::with_capacity(256 + ops_bytes.len());
769
770 writer.write_bytes(MAGIC_UNCOMPRESSED);
771 writer.write_byte(FORMAT_VERSION);
772 writer.write_id(&edit.id);
773 writer.write_string(&edit.name);
774 writer.write_id_vec(&edit.authors);
775 writer.write_signed_varint(edit.created_at);
776 dict_builder.write_dictionaries(&mut writer);
777 writer.write_varint(edit.ops.len() as u64);
778 writer.write_bytes(&ops_bytes);
779 let t3 = Instant::now();
780
781 let result = writer.into_bytes();
782
783 let total = t3.duration_since(t0);
784 eprintln!("=== Encode Profile (single-pass) ===");
785 eprintln!(" setup: {:?} ({:.1}%)", t1.duration_since(t0), 100.0 * t1.duration_since(t0).as_secs_f64() / total.as_secs_f64());
786 eprintln!(" encode_ops + build_dicts: {:?} ({:.1}%)", t2.duration_since(t1), 100.0 * t2.duration_since(t1).as_secs_f64() / total.as_secs_f64());
787 eprintln!(" assemble output: {:?} ({:.1}%)", t3.duration_since(t2), 100.0 * t3.duration_since(t2).as_secs_f64() / total.as_secs_f64());
788 eprintln!(" TOTAL: {:?}", total);
789
790 Ok(result)
791}
792
793pub fn encode_edit_compressed(edit: &Edit, level: i32) -> Result<Vec<u8>, EncodeError> {
795 encode_edit_compressed_with_options(edit, level, EncodeOptions::default())
796}
797
798pub fn encode_edit_compressed_with_options(
800 edit: &Edit,
801 level: i32,
802 options: EncodeOptions,
803) -> Result<Vec<u8>, EncodeError> {
804 let uncompressed = encode_edit_with_options(edit, options)?;
805
806 let compressed = zstd::encode_all(uncompressed.as_slice(), level)
807 .map_err(|e| EncodeError::CompressionFailed(e.to_string()))?;
808
809 let mut writer = Writer::with_capacity(5 + 10 + compressed.len());
810 writer.write_bytes(MAGIC_COMPRESSED);
811 writer.write_varint(uncompressed.len() as u64);
812 writer.write_bytes(&compressed);
813
814 Ok(writer.into_bytes())
815}
816
817#[cfg(test)]
818mod tests {
819 use super::*;
820 use crate::model::{CreateEntity, PropertyValue, Value};
821
822 fn make_test_edit() -> Edit<'static> {
823 Edit {
824 id: [1u8; 16],
825 name: Cow::Owned("Test Edit".to_string()),
826 authors: vec![[2u8; 16]],
827 created_at: 1234567890,
828 ops: vec![
829 Op::CreateEntity(CreateEntity {
830 id: [3u8; 16],
831 values: vec![PropertyValue {
832 property: [10u8; 16],
833 value: Value::Text {
834 value: Cow::Owned("Hello".to_string()),
835 language: None,
836 },
837 }],
838 }),
839 ],
840 }
841 }
842
843 #[test]
844 fn test_edit_roundtrip() {
845 let edit = make_test_edit();
846
847 let encoded = encode_edit(&edit).unwrap();
848 let decoded = decode_edit(&encoded).unwrap();
849
850 assert_eq!(edit.id, decoded.id);
851 assert_eq!(edit.name, decoded.name);
852 assert_eq!(edit.authors, decoded.authors);
853 assert_eq!(edit.created_at, decoded.created_at);
854 assert_eq!(edit.ops.len(), decoded.ops.len());
855 }
856
857 #[test]
858 fn test_edit_compressed_roundtrip() {
859 let edit = make_test_edit();
860
861 let encoded = encode_edit_compressed(&edit, 3).unwrap();
862 let decoded = decode_edit(&encoded).unwrap();
863
864 assert_eq!(edit.id, decoded.id);
865 assert_eq!(edit.name, decoded.name);
866 assert_eq!(edit.authors, decoded.authors);
867 assert_eq!(edit.created_at, decoded.created_at);
868 assert_eq!(edit.ops.len(), decoded.ops.len());
869 }
870
871 #[test]
872 fn test_compression_magic() {
873 let edit = make_test_edit();
874
875 let uncompressed = encode_edit(&edit).unwrap();
876 let compressed = encode_edit_compressed(&edit, 3).unwrap();
877
878 assert_eq!(&uncompressed[0..4], b"GRC2");
879 assert_eq!(&compressed[0..5], b"GRC2Z");
880 }
881
882 #[test]
883 fn test_invalid_magic() {
884 let data = b"XXXX";
885 let result = decode_edit(data);
886 assert!(matches!(result, Err(DecodeError::InvalidMagic { .. })));
887 }
888
889 #[test]
890 fn test_unsupported_version() {
891 let mut data = Vec::new();
892 data.extend_from_slice(MAGIC_UNCOMPRESSED);
893 data.push(99); data.extend_from_slice(&[0u8; 100]);
896
897 let result = decode_edit(&data);
898 assert!(matches!(result, Err(DecodeError::UnsupportedVersion { version: 99 })));
899 }
900
901 #[test]
902 fn test_empty_edit() {
903 let edit: Edit<'static> = Edit {
904 id: [0u8; 16],
905 name: Cow::Borrowed(""),
906 authors: vec![],
907 created_at: 0,
908 ops: vec![],
909 };
910
911 let encoded = encode_edit(&edit).unwrap();
912 let decoded = decode_edit(&encoded).unwrap();
913
914 assert_eq!(edit.id, decoded.id);
915 assert!(decoded.name.is_empty());
916 assert!(decoded.authors.is_empty());
917 assert!(decoded.ops.is_empty());
918 }
919
920 #[test]
921 fn test_canonical_encoding_deterministic() {
922 let prop_a = [0x0A; 16]; let prop_b = [0x0B; 16]; let edit1: Edit<'static> = Edit {
930 id: [1u8; 16],
931 name: Cow::Owned("Test".to_string()),
932 authors: vec![],
933 created_at: 0,
934 ops: vec![
935 Op::CreateEntity(CreateEntity {
936 id: [3u8; 16],
937 values: vec![
938 PropertyValue {
939 property: prop_a,
940 value: Value::Text {
941 value: Cow::Owned("Hello".to_string()),
942 language: None,
943 },
944 },
945 PropertyValue {
946 property: prop_b,
947 value: Value::Int64 { value: 42, unit: None },
948 },
949 ],
950 }),
951 ],
952 };
953
954 let edit2: Edit<'static> = Edit {
956 id: [1u8; 16],
957 name: Cow::Owned("Test".to_string()),
958 authors: vec![],
959 created_at: 0,
960 ops: vec![
961 Op::CreateEntity(CreateEntity {
962 id: [3u8; 16],
963 values: vec![
964 PropertyValue {
966 property: prop_b,
967 value: Value::Int64 { value: 42, unit: None },
968 },
969 PropertyValue {
970 property: prop_a,
971 value: Value::Text {
972 value: Cow::Owned("Hello".to_string()),
973 language: None,
974 },
975 },
976 ],
977 }),
978 ],
979 };
980
981 let fast1 = encode_edit_with_options(&edit1, EncodeOptions::new()).unwrap();
983 let fast2 = encode_edit_with_options(&edit2, EncodeOptions::new()).unwrap();
984 let canonical1 = encode_edit_with_options(&edit1, EncodeOptions::canonical()).unwrap();
989 let canonical2 = encode_edit_with_options(&edit2, EncodeOptions::canonical()).unwrap();
990
991 let decoded1 = decode_edit(&canonical1).unwrap();
993 let decoded2 = decode_edit(&canonical2).unwrap();
994 assert_eq!(decoded1.id, edit1.id);
995 assert_eq!(decoded2.id, edit2.id);
996
997 assert_eq!(
1001 &canonical1[..50], &canonical2[..50],
1003 "Canonical encoding should produce identical dictionary bytes"
1004 );
1005
1006 let _ = fast1;
1008 let _ = fast2;
1009 }
1010
1011 #[test]
1012 fn test_canonical_encoding_roundtrip() {
1013 let edit = make_test_edit();
1014
1015 let encoded = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap();
1016 let decoded = decode_edit(&encoded).unwrap();
1017
1018 assert_eq!(edit.id, decoded.id);
1019 assert_eq!(edit.name, decoded.name);
1020 assert_eq!(edit.authors, decoded.authors);
1021 assert_eq!(edit.created_at, decoded.created_at);
1022 assert_eq!(edit.ops.len(), decoded.ops.len());
1023 }
1024
1025 #[test]
1026 fn test_canonical_encoding_compressed() {
1027 let edit = make_test_edit();
1028
1029 let encoded = encode_edit_compressed_with_options(&edit, 3, EncodeOptions::canonical()).unwrap();
1030 let decoded = decode_edit(&encoded).unwrap();
1031
1032 assert_eq!(edit.id, decoded.id);
1033 assert_eq!(edit.name, decoded.name);
1034 }
1035
1036 #[test]
1037 fn test_canonical_rejects_duplicate_authors() {
1038 let author1 = [1u8; 16];
1039
1040 let edit: Edit<'static> = Edit {
1041 id: [0u8; 16],
1042 name: Cow::Owned("Test".to_string()),
1043 authors: vec![author1, author1], created_at: 0,
1045 ops: vec![],
1046 };
1047
1048 let result = encode_edit_with_options(&edit, EncodeOptions::new());
1050 assert!(result.is_ok());
1051
1052 let result = encode_edit_with_options(&edit, EncodeOptions::canonical());
1054 assert!(matches!(result, Err(EncodeError::DuplicateAuthor { .. })));
1055 }
1056
1057 #[test]
1058 fn test_canonical_rejects_duplicate_values() {
1059 let prop = [10u8; 16];
1060
1061 let edit: Edit<'static> = Edit {
1062 id: [0u8; 16],
1063 name: Cow::Owned("Test".to_string()),
1064 authors: vec![],
1065 created_at: 0,
1066 ops: vec![
1067 Op::CreateEntity(CreateEntity {
1068 id: [1u8; 16],
1069 values: vec![
1070 PropertyValue {
1071 property: prop,
1072 value: Value::Text {
1073 value: Cow::Owned("First".to_string()),
1074 language: None,
1075 },
1076 },
1077 PropertyValue {
1078 property: prop,
1079 value: Value::Text {
1080 value: Cow::Owned("Second".to_string()),
1081 language: None,
1082 },
1083 },
1084 ],
1085 }),
1086 ],
1087 };
1088
1089 let result = encode_edit_with_options(&edit, EncodeOptions::canonical());
1091 assert!(matches!(result, Err(EncodeError::DuplicateValue { .. })));
1092 }
1093
1094 #[test]
1095 fn test_canonical_allows_different_languages() {
1096 let prop = [10u8; 16];
1097 let lang_en = [20u8; 16];
1098 let lang_es = [21u8; 16];
1099
1100 let edit: Edit<'static> = Edit {
1101 id: [0u8; 16],
1102 name: Cow::Owned("Test".to_string()),
1103 authors: vec![],
1104 created_at: 0,
1105 ops: vec![
1106 Op::CreateEntity(CreateEntity {
1107 id: [1u8; 16],
1108 values: vec![
1109 PropertyValue {
1110 property: prop,
1111 value: Value::Text {
1112 value: Cow::Owned("Hello".to_string()),
1113 language: Some(lang_en),
1114 },
1115 },
1116 PropertyValue {
1117 property: prop,
1118 value: Value::Text {
1119 value: Cow::Owned("Hola".to_string()),
1120 language: Some(lang_es),
1121 },
1122 },
1123 ],
1124 }),
1125 ],
1126 };
1127
1128 let result = encode_edit_with_options(&edit, EncodeOptions::canonical());
1130 assert!(result.is_ok());
1131 }
1132
1133 #[test]
1134 fn test_canonical_sorts_values_deterministically() {
1135 let prop_a = [0x0A; 16];
1136 let prop_b = [0x0B; 16];
1137
1138 let edit: Edit<'static> = Edit {
1140 id: [1u8; 16],
1141 name: Cow::Owned("Test".to_string()),
1142 authors: vec![],
1143 created_at: 0,
1144 ops: vec![
1145 Op::CreateEntity(CreateEntity {
1146 id: [3u8; 16],
1147 values: vec![
1148 PropertyValue {
1149 property: prop_b, value: Value::Int64 { value: 42, unit: None },
1151 },
1152 PropertyValue {
1153 property: prop_a, value: Value::Text {
1155 value: Cow::Owned("Hello".to_string()),
1156 language: None,
1157 },
1158 },
1159 ],
1160 }),
1161 ],
1162 };
1163
1164 let encoded1 = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap();
1166 let encoded2 = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap();
1167 assert_eq!(encoded1, encoded2, "Canonical encoding should be deterministic");
1168
1169 let decoded = decode_edit(&encoded1).unwrap();
1171 assert_eq!(decoded.ops.len(), 1);
1172 }
1173}