1use std::borrow::Cow;
6use std::io::Read;
7
8use rustc_hash::{FxHashMap, FxHashSet};
9
10use crate::codec::op::{decode_op, encode_op};
11use crate::codec::primitives::{Reader, Writer};
12use crate::error::{DecodeError, EncodeError};
13use crate::limits::{
14 FORMAT_VERSION, MAGIC_COMPRESSED, MAGIC_UNCOMPRESSED, MAX_AUTHORS, MAX_DICT_SIZE,
15 MAX_EDIT_SIZE, MAX_OPS_PER_EDIT, MAX_STRING_LEN, MIN_FORMAT_VERSION,
16};
17use crate::model::{Context, ContextEdge, DataType, DictionaryBuilder, Edit, Id, Op, WireDictionaries};
18
19pub fn decompress(input: &[u8]) -> Result<Vec<u8>, DecodeError> {
33 if input.len() < 5 {
34 return Err(DecodeError::UnexpectedEof { context: "magic" });
35 }
36 if &input[0..5] != MAGIC_COMPRESSED {
37 let mut found = [0u8; 4];
38 found.copy_from_slice(&input[0..4]);
39 return Err(DecodeError::InvalidMagic { found });
40 }
41 decompress_zstd(&input[5..])
42}
43
44pub fn decode_edit(input: &[u8]) -> Result<Edit<'_>, DecodeError> {
61 if input.len() < 4 {
62 return Err(DecodeError::UnexpectedEof { context: "magic" });
63 }
64
65 if input.len() >= 5 && &input[0..5] == MAGIC_COMPRESSED {
67 let decompressed = decompress_zstd(&input[5..])?;
70 if decompressed.len() > MAX_EDIT_SIZE {
71 return Err(DecodeError::LengthExceedsLimit {
72 field: "edit",
73 len: decompressed.len(),
74 max: MAX_EDIT_SIZE,
75 });
76 }
77 decode_edit_owned(&decompressed)
78 } else if &input[0..4] == MAGIC_UNCOMPRESSED {
79 if input.len() > MAX_EDIT_SIZE {
81 return Err(DecodeError::LengthExceedsLimit {
82 field: "edit",
83 len: input.len(),
84 max: MAX_EDIT_SIZE,
85 });
86 }
87 decode_edit_borrowed(input)
88 } else {
89 let mut found = [0u8; 4];
90 found.copy_from_slice(&input[0..4]);
91 Err(DecodeError::InvalidMagic { found })
92 }
93}
94
95fn decode_edit_borrowed(input: &[u8]) -> Result<Edit<'_>, DecodeError> {
97 let mut reader = Reader::new(input);
98
99 reader.read_bytes(4, "magic")?;
101
102 let version = reader.read_byte("version")?;
104 if version < MIN_FORMAT_VERSION || version > FORMAT_VERSION {
105 return Err(DecodeError::UnsupportedVersion { version });
106 }
107
108 let edit_id = reader.read_id("edit_id")?;
110 let name = Cow::Borrowed(reader.read_str(MAX_STRING_LEN, "name")?);
111 let authors = reader.read_id_vec(MAX_AUTHORS, "authors")?;
112 let created_at = reader.read_signed_varint("created_at")?;
113
114 let property_count = reader.read_varint("property_count")? as usize;
116 if property_count > MAX_DICT_SIZE {
117 return Err(DecodeError::LengthExceedsLimit {
118 field: "properties",
119 len: property_count,
120 max: MAX_DICT_SIZE,
121 });
122 }
123 let mut properties = Vec::with_capacity(property_count);
124 let mut seen_props = FxHashSet::with_capacity_and_hasher(property_count, Default::default());
125 for _ in 0..property_count {
126 let id = reader.read_id("property_id")?;
127 if !seen_props.insert(id) {
128 return Err(DecodeError::DuplicateDictionaryEntry { dict: "properties", id });
129 }
130 let dt_byte = reader.read_byte("data_type")?;
131 let data_type = DataType::from_u8(dt_byte)
132 .ok_or(DecodeError::InvalidDataType { data_type: dt_byte })?;
133 properties.push((id, data_type));
134 }
135
136 let relation_types = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "relation_types")?;
137 let languages = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "languages")?;
138 let units = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "units")?;
139 let objects = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "objects")?;
140 let context_ids = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "context_ids")?;
141
142 let mut dicts = WireDictionaries {
143 properties,
144 relation_types,
145 languages,
146 units,
147 objects,
148 context_ids,
149 contexts: Vec::new(),
150 };
151
152 let context_count = reader.read_varint("context_count")? as usize;
154 if context_count > MAX_DICT_SIZE {
155 return Err(DecodeError::LengthExceedsLimit {
156 field: "contexts",
157 len: context_count,
158 max: MAX_DICT_SIZE,
159 });
160 }
161 for _ in 0..context_count {
162 dicts.contexts.push(decode_context(&mut reader, &dicts)?);
163 }
164
165 let op_count = reader.read_varint("op_count")? as usize;
167 if op_count > MAX_OPS_PER_EDIT {
168 return Err(DecodeError::LengthExceedsLimit {
169 field: "ops",
170 len: op_count,
171 max: MAX_OPS_PER_EDIT,
172 });
173 }
174
175 let mut ops = Vec::with_capacity(op_count);
176 for _ in 0..op_count {
177 ops.push(decode_op(&mut reader, &dicts)?);
178 }
179
180 Ok(Edit {
181 id: edit_id,
182 name,
183 authors,
184 created_at,
185 ops,
186 })
187}
188
189fn decode_edit_owned(data: &[u8]) -> Result<Edit<'static>, DecodeError> {
191 let mut reader = Reader::new(data);
192
193 reader.read_bytes(4, "magic")?;
195
196 let version = reader.read_byte("version")?;
198 if version < MIN_FORMAT_VERSION || version > FORMAT_VERSION {
199 return Err(DecodeError::UnsupportedVersion { version });
200 }
201
202 let edit_id = reader.read_id("edit_id")?;
204 let name = Cow::Owned(reader.read_string(MAX_STRING_LEN, "name")?);
205 let authors = reader.read_id_vec(MAX_AUTHORS, "authors")?;
206 let created_at = reader.read_signed_varint("created_at")?;
207
208 let property_count = reader.read_varint("property_count")? as usize;
210 if property_count > MAX_DICT_SIZE {
211 return Err(DecodeError::LengthExceedsLimit {
212 field: "properties",
213 len: property_count,
214 max: MAX_DICT_SIZE,
215 });
216 }
217 let mut properties = Vec::with_capacity(property_count);
218 let mut seen_props = FxHashSet::with_capacity_and_hasher(property_count, Default::default());
219 for _ in 0..property_count {
220 let id = reader.read_id("property_id")?;
221 if !seen_props.insert(id) {
222 return Err(DecodeError::DuplicateDictionaryEntry { dict: "properties", id });
223 }
224 let dt_byte = reader.read_byte("data_type")?;
225 let data_type = DataType::from_u8(dt_byte)
226 .ok_or(DecodeError::InvalidDataType { data_type: dt_byte })?;
227 properties.push((id, data_type));
228 }
229
230 let relation_types = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "relation_types")?;
231 let languages = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "languages")?;
232 let units = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "units")?;
233 let objects = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "objects")?;
234 let context_ids = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "context_ids")?;
235
236 let mut dicts = WireDictionaries {
237 properties,
238 relation_types,
239 languages,
240 units,
241 objects,
242 context_ids,
243 contexts: Vec::new(),
244 };
245
246 let context_count = reader.read_varint("context_count")? as usize;
248 if context_count > MAX_DICT_SIZE {
249 return Err(DecodeError::LengthExceedsLimit {
250 field: "contexts",
251 len: context_count,
252 max: MAX_DICT_SIZE,
253 });
254 }
255 for _ in 0..context_count {
256 dicts.contexts.push(decode_context(&mut reader, &dicts)?);
257 }
258
259 let op_count = reader.read_varint("op_count")? as usize;
261 if op_count > MAX_OPS_PER_EDIT {
262 return Err(DecodeError::LengthExceedsLimit {
263 field: "ops",
264 len: op_count,
265 max: MAX_OPS_PER_EDIT,
266 });
267 }
268
269 let mut ops = Vec::with_capacity(op_count);
270 for _ in 0..op_count {
271 ops.push(decode_op_owned(&mut reader, &dicts)?);
272 }
273
274 Ok(Edit {
275 id: edit_id,
276 name,
277 authors,
278 created_at,
279 ops,
280 })
281}
282
283fn decode_op_owned(reader: &mut Reader<'_>, dicts: &WireDictionaries) -> Result<Op<'static>, DecodeError> {
285 let op = decode_op(reader, dicts)?;
287 Ok(op_to_owned(op))
288}
289
290fn decode_context(reader: &mut Reader<'_>, dicts: &WireDictionaries) -> Result<Context, DecodeError> {
292 let root_id_index = reader.read_varint("root_id")? as usize;
293 if root_id_index >= dicts.context_ids.len() {
294 return Err(DecodeError::IndexOutOfBounds {
295 dict: "context_ids",
296 index: root_id_index,
297 size: dicts.context_ids.len(),
298 });
299 }
300 let root_id = dicts.context_ids[root_id_index];
301
302 let edge_count = reader.read_varint("edge_count")? as usize;
303 if edge_count > MAX_DICT_SIZE {
304 return Err(DecodeError::LengthExceedsLimit {
305 field: "context_edges",
306 len: edge_count,
307 max: MAX_DICT_SIZE,
308 });
309 }
310
311 let mut edges = Vec::with_capacity(edge_count);
312 for _ in 0..edge_count {
313 let type_id_index = reader.read_varint("edge_type_id")? as usize;
314 if type_id_index >= dicts.relation_types.len() {
315 return Err(DecodeError::IndexOutOfBounds {
316 dict: "relation_types",
317 index: type_id_index,
318 size: dicts.relation_types.len(),
319 });
320 }
321 let type_id = dicts.relation_types[type_id_index];
322
323 let to_entity_id_index = reader.read_varint("edge_to_entity_id")? as usize;
324 if to_entity_id_index >= dicts.context_ids.len() {
325 return Err(DecodeError::IndexOutOfBounds {
326 dict: "context_ids",
327 index: to_entity_id_index,
328 size: dicts.context_ids.len(),
329 });
330 }
331 let to_entity_id = dicts.context_ids[to_entity_id_index];
332
333 edges.push(ContextEdge { type_id, to_entity_id });
334 }
335
336 Ok(Context { root_id, edges })
337}
338
339fn op_to_owned(op: Op<'_>) -> Op<'static> {
341 match op {
342 Op::CreateEntity(ce) => Op::CreateEntity(crate::model::CreateEntity {
343 id: ce.id,
344 values: ce.values.into_iter().map(pv_to_owned).collect(),
345 context: ce.context,
346 }),
347 Op::UpdateEntity(ue) => Op::UpdateEntity(crate::model::UpdateEntity {
348 id: ue.id,
349 set_properties: ue.set_properties.into_iter().map(pv_to_owned).collect(),
350 unset_values: ue.unset_values,
351 context: ue.context,
352 }),
353 Op::DeleteEntity(de) => Op::DeleteEntity(de),
354 Op::RestoreEntity(re) => Op::RestoreEntity(re),
355 Op::CreateRelation(cr) => Op::CreateRelation(crate::model::CreateRelation {
356 id: cr.id,
357 relation_type: cr.relation_type,
358 from: cr.from,
359 from_is_value_ref: cr.from_is_value_ref,
360 to: cr.to,
361 to_is_value_ref: cr.to_is_value_ref,
362 entity: cr.entity,
363 position: cr.position.map(|p| Cow::Owned(p.into_owned())),
364 from_space: cr.from_space,
365 from_version: cr.from_version,
366 to_space: cr.to_space,
367 to_version: cr.to_version,
368 context: cr.context,
369 }),
370 Op::UpdateRelation(ur) => Op::UpdateRelation(crate::model::UpdateRelation {
371 id: ur.id,
372 from_space: ur.from_space,
373 from_version: ur.from_version,
374 to_space: ur.to_space,
375 to_version: ur.to_version,
376 position: ur.position.map(|p| Cow::Owned(p.into_owned())),
377 unset: ur.unset,
378 context: ur.context,
379 }),
380 Op::DeleteRelation(dr) => Op::DeleteRelation(dr),
381 Op::RestoreRelation(rr) => Op::RestoreRelation(rr),
382 Op::CreateValueRef(cvr) => Op::CreateValueRef(cvr),
383 }
384}
385
386fn pv_to_owned(pv: crate::model::PropertyValue<'_>) -> crate::model::PropertyValue<'static> {
388 crate::model::PropertyValue {
389 property: pv.property,
390 value: value_to_owned(pv.value),
391 }
392}
393
394fn value_to_owned(v: crate::model::Value<'_>) -> crate::model::Value<'static> {
396 use crate::model::{DecimalMantissa, Value};
397 match v {
398 Value::Bool(b) => Value::Bool(b),
399 Value::Int64 { value, unit } => Value::Int64 { value, unit },
400 Value::Float64 { value, unit } => Value::Float64 { value, unit },
401 Value::Decimal { exponent, mantissa, unit } => Value::Decimal {
402 exponent,
403 mantissa: match mantissa {
404 DecimalMantissa::I64(i) => DecimalMantissa::I64(i),
405 DecimalMantissa::Big(b) => DecimalMantissa::Big(Cow::Owned(b.into_owned())),
406 },
407 unit,
408 },
409 Value::Text { value, language } => Value::Text {
410 value: Cow::Owned(value.into_owned()),
411 language,
412 },
413 Value::Bytes(b) => Value::Bytes(Cow::Owned(b.into_owned())),
414 Value::Date(s) => Value::Date(Cow::Owned(s.into_owned())),
415 Value::Time(s) => Value::Time(Cow::Owned(s.into_owned())),
416 Value::Datetime(s) => Value::Datetime(Cow::Owned(s.into_owned())),
417 Value::Schedule(s) => Value::Schedule(Cow::Owned(s.into_owned())),
418 Value::Point { lat, lon, alt } => Value::Point { lat, lon, alt },
419 Value::Rect { min_lat, min_lon, max_lat, max_lon } => Value::Rect { min_lat, min_lon, max_lat, max_lon },
420 Value::Embedding { sub_type, dims, data } => Value::Embedding {
421 sub_type,
422 dims,
423 data: Cow::Owned(data.into_owned()),
424 },
425 }
426}
427
428fn read_id_vec_no_duplicates(
430 reader: &mut Reader<'_>,
431 max_len: usize,
432 field: &'static str,
433) -> Result<Vec<Id>, DecodeError> {
434 let count = reader.read_varint(field)? as usize;
435 if count > max_len {
436 return Err(DecodeError::LengthExceedsLimit {
437 field,
438 len: count,
439 max: max_len,
440 });
441 }
442
443 let mut ids = Vec::with_capacity(count);
444 let mut seen = FxHashSet::with_capacity_and_hasher(count, Default::default());
445
446 for _ in 0..count {
447 let id = reader.read_id(field)?;
448 if !seen.insert(id) {
449 return Err(DecodeError::DuplicateDictionaryEntry { dict: field, id });
450 }
451 ids.push(id);
452 }
453
454 Ok(ids)
455}
456
457fn decompress_zstd(compressed: &[u8]) -> Result<Vec<u8>, DecodeError> {
458 let mut reader = Reader::new(compressed);
460 let declared_size = reader.read_varint("uncompressed_size")? as usize;
461
462 if declared_size > MAX_EDIT_SIZE {
463 return Err(DecodeError::LengthExceedsLimit {
464 field: "uncompressed_size",
465 len: declared_size,
466 max: MAX_EDIT_SIZE,
467 });
468 }
469
470 let compressed_data = reader.remaining();
471
472 let mut decoder = zstd::Decoder::new(compressed_data)
473 .map_err(|e| DecodeError::DecompressionFailed(e.to_string()))?;
474
475 let mut decompressed = Vec::with_capacity(declared_size);
476 decoder
477 .read_to_end(&mut decompressed)
478 .map_err(|e| DecodeError::DecompressionFailed(e.to_string()))?;
479
480 if decompressed.len() != declared_size {
481 return Err(DecodeError::UncompressedSizeMismatch {
482 declared: declared_size,
483 actual: decompressed.len(),
484 });
485 }
486
487 Ok(decompressed)
488}
489
490#[derive(Debug, Clone, Copy, Default)]
496pub struct EncodeOptions {
497 pub canonical: bool,
511}
512
513impl EncodeOptions {
514 pub fn new() -> Self {
516 Self::default()
517 }
518
519 pub fn canonical() -> Self {
521 Self { canonical: true }
522 }
523}
524
525pub fn encode_edit(edit: &Edit) -> Result<Vec<u8>, EncodeError> {
530 encode_edit_with_options(edit, EncodeOptions::default())
531}
532
533pub fn encode_edit_with_options(edit: &Edit, options: EncodeOptions) -> Result<Vec<u8>, EncodeError> {
535 if options.canonical {
536 encode_edit_canonical(edit)
537 } else {
538 encode_edit_fast(edit)
539 }
540}
541
542fn encode_edit_fast(edit: &Edit) -> Result<Vec<u8>, EncodeError> {
544 let property_types = rustc_hash::FxHashMap::default();
546
547 let mut dict_builder = DictionaryBuilder::with_capacity(edit.ops.len());
549
550 let mut ops_writer = Writer::with_capacity(edit.ops.len() * 50);
552
553 for op in &edit.ops {
554 encode_op(&mut ops_writer, op, &mut dict_builder, &property_types)?;
555 }
556
557 let ops_bytes = ops_writer.into_bytes();
559 let mut writer = Writer::with_capacity(256 + ops_bytes.len());
560
561 writer.write_bytes(MAGIC_UNCOMPRESSED);
563 writer.write_byte(FORMAT_VERSION);
564
565 writer.write_id(&edit.id);
567 writer.write_string(&edit.name);
568 writer.write_id_vec(&edit.authors);
569 writer.write_signed_varint(edit.created_at);
570
571 dict_builder.write_dictionaries(&mut writer);
573
574 dict_builder.write_contexts(&mut writer);
576
577 writer.write_varint(edit.ops.len() as u64);
579 writer.write_bytes(&ops_bytes);
580
581 Ok(writer.into_bytes())
582}
583
584fn encode_edit_canonical(edit: &Edit) -> Result<Vec<u8>, EncodeError> {
595 let property_types = rustc_hash::FxHashMap::default();
597
598 let mut dict_builder = DictionaryBuilder::with_capacity(edit.ops.len());
600
601 let mut temp_writer = Writer::with_capacity(edit.ops.len() * 50);
603 for op in &edit.ops {
604 encode_op(&mut temp_writer, op, &mut dict_builder, &property_types)?;
605 }
606
607 let sorted_builder = dict_builder.into_sorted();
609
610 let mut sorted_authors = edit.authors.clone();
612 sorted_authors.sort();
613 for i in 1..sorted_authors.len() {
615 if sorted_authors[i] == sorted_authors[i - 1] {
616 return Err(EncodeError::DuplicateAuthor { id: sorted_authors[i] });
617 }
618 }
619
620 let mut ops_writer = Writer::with_capacity(edit.ops.len() * 50);
622 let mut canonical_builder = sorted_builder.clone();
623 for op in &edit.ops {
624 encode_op_canonical(&mut ops_writer, op, &mut canonical_builder, &property_types)?;
625 }
626
627 let ops_bytes = ops_writer.into_bytes();
629 let mut writer = Writer::with_capacity(256 + ops_bytes.len());
630
631 writer.write_bytes(MAGIC_UNCOMPRESSED);
633 writer.write_byte(FORMAT_VERSION);
634
635 writer.write_id(&edit.id);
637 writer.write_string(&edit.name);
638 writer.write_id_vec(&sorted_authors);
639 writer.write_signed_varint(edit.created_at);
640
641 sorted_builder.write_dictionaries(&mut writer);
643
644 sorted_builder.write_contexts(&mut writer);
646
647 writer.write_varint(edit.ops.len() as u64);
649 writer.write_bytes(&ops_bytes);
650
651 Ok(writer.into_bytes())
652}
653
654fn encode_op_canonical(
656 writer: &mut Writer,
657 op: &Op<'_>,
658 dict_builder: &mut DictionaryBuilder,
659 property_types: &FxHashMap<Id, DataType>,
660) -> Result<(), EncodeError> {
661 match op {
662 Op::CreateEntity(ce) => {
663 let sorted_values = sort_and_check_values(&ce.values, dict_builder)?;
665
666 writer.write_byte(1); writer.write_id(&ce.id);
668 writer.write_varint(sorted_values.len() as u64);
669
670 for pv in &sorted_values {
671 let data_type = property_types.get(&pv.property)
672 .copied()
673 .unwrap_or_else(|| pv.value.data_type());
674 encode_property_value_canonical(writer, pv, dict_builder, data_type)?;
675 }
676 let context_ref = match &ce.context {
678 Some(ctx) => dict_builder.add_context(ctx) as u32,
679 None => 0xFFFFFFFF,
680 };
681 writer.write_varint(context_ref as u64);
682 Ok(())
683 }
684 Op::UpdateEntity(ue) => {
685 let sorted_set = sort_and_check_values(&ue.set_properties, dict_builder)?;
687 let sorted_unset = sort_and_check_unsets(&ue.unset_values, dict_builder)?;
688
689 writer.write_byte(2); let id_index = dict_builder.add_object(ue.id);
691 writer.write_varint(id_index as u64);
692
693 let mut flags = 0u8;
694 if !sorted_set.is_empty() {
695 flags |= 0x01; }
697 if !sorted_unset.is_empty() {
698 flags |= 0x02; }
700 writer.write_byte(flags);
701
702 if !sorted_set.is_empty() {
703 writer.write_varint(sorted_set.len() as u64);
704 for pv in &sorted_set {
705 let data_type = property_types.get(&pv.property)
706 .copied()
707 .unwrap_or_else(|| pv.value.data_type());
708 encode_property_value_canonical(writer, pv, dict_builder, data_type)?;
709 }
710 }
711
712 if !sorted_unset.is_empty() {
713 use crate::model::UnsetLanguage;
714 writer.write_varint(sorted_unset.len() as u64);
715 for unset in &sorted_unset {
716 let prop_idx = dict_builder.add_property(unset.property, DataType::Bool);
717 writer.write_varint(prop_idx as u64);
718 let lang_value: u32 = match &unset.language {
719 UnsetLanguage::All => 0xFFFFFFFF,
720 UnsetLanguage::English => 0,
721 UnsetLanguage::Specific(lang_id) => {
722 dict_builder.add_language(Some(*lang_id)) as u32
723 }
724 };
725 writer.write_varint(lang_value as u64);
726 }
727 }
728 let context_ref = match &ue.context {
730 Some(ctx) => dict_builder.add_context(ctx) as u32,
731 None => 0xFFFFFFFF,
732 };
733 writer.write_varint(context_ref as u64);
734 Ok(())
735 }
736 _ => encode_op(writer, op, dict_builder, property_types),
738 }
739}
740
741fn sort_and_check_values<'a>(
743 values: &[crate::model::PropertyValue<'a>],
744 dict_builder: &DictionaryBuilder,
745) -> Result<Vec<crate::model::PropertyValue<'a>>, EncodeError> {
746 use crate::model::{PropertyValue, Value};
747
748 if values.is_empty() {
749 return Ok(Vec::new());
750 }
751
752 let mut indexed: Vec<(usize, usize, usize, &PropertyValue<'a>)> = values
754 .iter()
755 .enumerate()
756 .map(|(i, pv)| {
757 let prop_idx = dict_builder.get_property_index(&pv.property).unwrap_or(0);
758 let lang_idx = match &pv.value {
759 Value::Text { language, .. } => dict_builder.get_language_index(language.as_ref()).unwrap_or(0),
760 _ => 0,
761 };
762 (prop_idx, lang_idx, i, pv)
763 })
764 .collect();
765
766 indexed.sort_by(|a, b| (a.0, a.1).cmp(&(b.0, b.1)));
768
769 for i in 1..indexed.len() {
771 if indexed[i].0 == indexed[i - 1].0 && indexed[i].1 == indexed[i - 1].1 {
772 let pv = indexed[i].3;
773 let language = match &pv.value {
774 Value::Text { language, .. } => *language,
775 _ => None,
776 };
777 return Err(EncodeError::DuplicateValue {
778 property: pv.property,
779 language,
780 });
781 }
782 }
783
784 Ok(indexed.into_iter().map(|(_, _, _, pv)| pv.clone()).collect())
786}
787
788fn sort_and_check_unsets(
790 unsets: &[crate::model::UnsetValue],
791 dict_builder: &DictionaryBuilder,
792) -> Result<Vec<crate::model::UnsetValue>, EncodeError> {
793 use crate::model::UnsetLanguage;
794
795 if unsets.is_empty() {
796 return Ok(Vec::new());
797 }
798
799 let mut indexed: Vec<(usize, u32, usize, &crate::model::UnsetValue)> = unsets
801 .iter()
802 .enumerate()
803 .map(|(i, up)| {
804 let prop_idx = dict_builder.get_property_index(&up.property).unwrap_or(0);
805 let lang_key: u32 = match &up.language {
806 UnsetLanguage::All => 0xFFFFFFFF,
807 UnsetLanguage::English => 0,
808 UnsetLanguage::Specific(lang_id) => {
809 dict_builder.get_language_index(Some(lang_id)).unwrap_or(0) as u32
810 }
811 };
812 (prop_idx, lang_key, i, up)
813 })
814 .collect();
815
816 indexed.sort_by(|a, b| (a.0, a.1).cmp(&(b.0, b.1)));
818
819 for i in 1..indexed.len() {
821 if indexed[i].0 == indexed[i - 1].0 && indexed[i].1 == indexed[i - 1].1 {
822 let up = indexed[i].3;
823 let language = match &up.language {
824 UnsetLanguage::All => None,
825 UnsetLanguage::English => None,
826 UnsetLanguage::Specific(id) => Some(*id),
827 };
828 return Err(EncodeError::DuplicateUnset {
829 property: up.property,
830 language,
831 });
832 }
833 }
834
835 Ok(indexed.into_iter().map(|(_, _, _, up)| up.clone()).collect())
836}
837
838fn encode_property_value_canonical(
840 writer: &mut Writer,
841 pv: &crate::model::PropertyValue<'_>,
842 dict_builder: &mut DictionaryBuilder,
843 data_type: DataType,
844) -> Result<(), EncodeError> {
845 let prop_index = dict_builder.add_property(pv.property, data_type);
846 writer.write_varint(prop_index as u64);
847 crate::codec::value::encode_value(writer, &pv.value, dict_builder)?;
848 Ok(())
849}
850
851pub fn encode_edit_profiled(edit: &Edit, profile: bool) -> Result<Vec<u8>, EncodeError> {
853 if !profile {
854 return encode_edit(edit);
855 }
856
857 use std::time::Instant;
858
859 let t0 = Instant::now();
860
861 let property_types = rustc_hash::FxHashMap::default();
863 let t1 = Instant::now();
864
865 let mut dict_builder = DictionaryBuilder::with_capacity(edit.ops.len());
867
868 let mut ops_writer = Writer::with_capacity(edit.ops.len() * 50);
870
871 for op in &edit.ops {
872 encode_op(&mut ops_writer, op, &mut dict_builder, &property_types)?;
873 }
874 let t2 = Instant::now();
875
876 let ops_bytes = ops_writer.into_bytes();
878 let mut writer = Writer::with_capacity(256 + ops_bytes.len());
879
880 writer.write_bytes(MAGIC_UNCOMPRESSED);
881 writer.write_byte(FORMAT_VERSION);
882 writer.write_id(&edit.id);
883 writer.write_string(&edit.name);
884 writer.write_id_vec(&edit.authors);
885 writer.write_signed_varint(edit.created_at);
886 dict_builder.write_dictionaries(&mut writer);
887 dict_builder.write_contexts(&mut writer);
888 writer.write_varint(edit.ops.len() as u64);
889 writer.write_bytes(&ops_bytes);
890 let t3 = Instant::now();
891
892 let result = writer.into_bytes();
893
894 let total = t3.duration_since(t0);
895 eprintln!("=== Encode Profile (single-pass) ===");
896 eprintln!(" setup: {:?} ({:.1}%)", t1.duration_since(t0), 100.0 * t1.duration_since(t0).as_secs_f64() / total.as_secs_f64());
897 eprintln!(" encode_ops + build_dicts: {:?} ({:.1}%)", t2.duration_since(t1), 100.0 * t2.duration_since(t1).as_secs_f64() / total.as_secs_f64());
898 eprintln!(" assemble output: {:?} ({:.1}%)", t3.duration_since(t2), 100.0 * t3.duration_since(t2).as_secs_f64() / total.as_secs_f64());
899 eprintln!(" TOTAL: {:?}", total);
900
901 Ok(result)
902}
903
904pub fn encode_edit_compressed(edit: &Edit, level: i32) -> Result<Vec<u8>, EncodeError> {
906 encode_edit_compressed_with_options(edit, level, EncodeOptions::default())
907}
908
909pub fn encode_edit_compressed_with_options(
911 edit: &Edit,
912 level: i32,
913 options: EncodeOptions,
914) -> Result<Vec<u8>, EncodeError> {
915 let uncompressed = encode_edit_with_options(edit, options)?;
916
917 let compressed = zstd::encode_all(uncompressed.as_slice(), level)
918 .map_err(|e| EncodeError::CompressionFailed(e.to_string()))?;
919
920 let mut writer = Writer::with_capacity(5 + 10 + compressed.len());
921 writer.write_bytes(MAGIC_COMPRESSED);
922 writer.write_varint(uncompressed.len() as u64);
923 writer.write_bytes(&compressed);
924
925 Ok(writer.into_bytes())
926}
927
928#[cfg(test)]
929mod tests {
930 use super::*;
931 use crate::model::{CreateEntity, PropertyValue, Value};
932
933 fn make_test_edit() -> Edit<'static> {
934 Edit {
935 id: [1u8; 16],
936 name: Cow::Owned("Test Edit".to_string()),
937 authors: vec![[2u8; 16]],
938 created_at: 1234567890,
939 ops: vec![
940 Op::CreateEntity(CreateEntity {
941 id: [3u8; 16],
942 values: vec![PropertyValue {
943 property: [10u8; 16],
944 value: Value::Text {
945 value: Cow::Owned("Hello".to_string()),
946 language: None,
947 },
948 }],
949 context: None,
950 }),
951 ],
952 }
953 }
954
955 #[test]
956 fn test_edit_roundtrip() {
957 let edit = make_test_edit();
958
959 let encoded = encode_edit(&edit).unwrap();
960 let decoded = decode_edit(&encoded).unwrap();
961
962 assert_eq!(edit.id, decoded.id);
963 assert_eq!(edit.name, decoded.name);
964 assert_eq!(edit.authors, decoded.authors);
965 assert_eq!(edit.created_at, decoded.created_at);
966 assert_eq!(edit.ops.len(), decoded.ops.len());
967 }
968
969 #[test]
970 fn test_edit_compressed_roundtrip() {
971 let edit = make_test_edit();
972
973 let encoded = encode_edit_compressed(&edit, 3).unwrap();
974 let decoded = decode_edit(&encoded).unwrap();
975
976 assert_eq!(edit.id, decoded.id);
977 assert_eq!(edit.name, decoded.name);
978 assert_eq!(edit.authors, decoded.authors);
979 assert_eq!(edit.created_at, decoded.created_at);
980 assert_eq!(edit.ops.len(), decoded.ops.len());
981 }
982
983 #[test]
984 fn test_compression_magic() {
985 let edit = make_test_edit();
986
987 let uncompressed = encode_edit(&edit).unwrap();
988 let compressed = encode_edit_compressed(&edit, 3).unwrap();
989
990 assert_eq!(&uncompressed[0..4], b"GRC2");
991 assert_eq!(&compressed[0..5], b"GRC2Z");
992 }
993
994 #[test]
995 fn test_invalid_magic() {
996 let data = b"XXXX";
997 let result = decode_edit(data);
998 assert!(matches!(result, Err(DecodeError::InvalidMagic { .. })));
999 }
1000
1001 #[test]
1002 fn test_unsupported_version() {
1003 let mut data = Vec::new();
1004 data.extend_from_slice(MAGIC_UNCOMPRESSED);
1005 data.push(99); data.extend_from_slice(&[0u8; 100]);
1008
1009 let result = decode_edit(&data);
1010 assert!(matches!(result, Err(DecodeError::UnsupportedVersion { version: 99 })));
1011 }
1012
1013 #[test]
1014 fn test_empty_edit() {
1015 let edit: Edit<'static> = Edit {
1016 id: [0u8; 16],
1017 name: Cow::Borrowed(""),
1018 authors: vec![],
1019 created_at: 0,
1020 ops: vec![],
1021 };
1022
1023 let encoded = encode_edit(&edit).unwrap();
1024 let decoded = decode_edit(&encoded).unwrap();
1025
1026 assert_eq!(edit.id, decoded.id);
1027 assert!(decoded.name.is_empty());
1028 assert!(decoded.authors.is_empty());
1029 assert!(decoded.ops.is_empty());
1030 }
1031
1032 #[test]
1033 fn test_canonical_encoding_deterministic() {
1034 let prop_a = [0x0A; 16]; let prop_b = [0x0B; 16]; let edit1: Edit<'static> = Edit {
1042 id: [1u8; 16],
1043 name: Cow::Owned("Test".to_string()),
1044 authors: vec![],
1045 created_at: 0,
1046 ops: vec![
1047 Op::CreateEntity(CreateEntity {
1048 id: [3u8; 16],
1049 values: vec![
1050 PropertyValue {
1051 property: prop_a,
1052 value: Value::Text {
1053 value: Cow::Owned("Hello".to_string()),
1054 language: None,
1055 },
1056 },
1057 PropertyValue {
1058 property: prop_b,
1059 value: Value::Int64 { value: 42, unit: None },
1060 },
1061 ],
1062 context: None,
1063 }),
1064 ],
1065 };
1066
1067 let edit2: Edit<'static> = Edit {
1069 id: [1u8; 16],
1070 name: Cow::Owned("Test".to_string()),
1071 authors: vec![],
1072 created_at: 0,
1073 ops: vec![
1074 Op::CreateEntity(CreateEntity {
1075 id: [3u8; 16],
1076 values: vec![
1077 PropertyValue {
1079 property: prop_b,
1080 value: Value::Int64 { value: 42, unit: None },
1081 },
1082 PropertyValue {
1083 property: prop_a,
1084 value: Value::Text {
1085 value: Cow::Owned("Hello".to_string()),
1086 language: None,
1087 },
1088 },
1089 ],
1090 context: None,
1091 }),
1092 ],
1093 };
1094
1095 let fast1 = encode_edit_with_options(&edit1, EncodeOptions::new()).unwrap();
1097 let fast2 = encode_edit_with_options(&edit2, EncodeOptions::new()).unwrap();
1098 let canonical1 = encode_edit_with_options(&edit1, EncodeOptions::canonical()).unwrap();
1103 let canonical2 = encode_edit_with_options(&edit2, EncodeOptions::canonical()).unwrap();
1104
1105 let decoded1 = decode_edit(&canonical1).unwrap();
1107 let decoded2 = decode_edit(&canonical2).unwrap();
1108 assert_eq!(decoded1.id, edit1.id);
1109 assert_eq!(decoded2.id, edit2.id);
1110
1111 assert_eq!(
1115 &canonical1[..50], &canonical2[..50],
1117 "Canonical encoding should produce identical dictionary bytes"
1118 );
1119
1120 let _ = fast1;
1122 let _ = fast2;
1123 }
1124
1125 #[test]
1126 fn test_canonical_encoding_roundtrip() {
1127 let edit = make_test_edit();
1128
1129 let encoded = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap();
1130 let decoded = decode_edit(&encoded).unwrap();
1131
1132 assert_eq!(edit.id, decoded.id);
1133 assert_eq!(edit.name, decoded.name);
1134 assert_eq!(edit.authors, decoded.authors);
1135 assert_eq!(edit.created_at, decoded.created_at);
1136 assert_eq!(edit.ops.len(), decoded.ops.len());
1137 }
1138
1139 #[test]
1140 fn test_canonical_encoding_compressed() {
1141 let edit = make_test_edit();
1142
1143 let encoded = encode_edit_compressed_with_options(&edit, 3, EncodeOptions::canonical()).unwrap();
1144 let decoded = decode_edit(&encoded).unwrap();
1145
1146 assert_eq!(edit.id, decoded.id);
1147 assert_eq!(edit.name, decoded.name);
1148 }
1149
1150 #[test]
1151 fn test_canonical_rejects_duplicate_authors() {
1152 let author1 = [1u8; 16];
1153
1154 let edit: Edit<'static> = Edit {
1155 id: [0u8; 16],
1156 name: Cow::Owned("Test".to_string()),
1157 authors: vec![author1, author1], created_at: 0,
1159 ops: vec![],
1160 };
1161
1162 let result = encode_edit_with_options(&edit, EncodeOptions::new());
1164 assert!(result.is_ok());
1165
1166 let result = encode_edit_with_options(&edit, EncodeOptions::canonical());
1168 assert!(matches!(result, Err(EncodeError::DuplicateAuthor { .. })));
1169 }
1170
1171 #[test]
1172 fn test_canonical_rejects_duplicate_values() {
1173 let prop = [10u8; 16];
1174
1175 let edit: Edit<'static> = Edit {
1176 id: [0u8; 16],
1177 name: Cow::Owned("Test".to_string()),
1178 authors: vec![],
1179 created_at: 0,
1180 ops: vec![
1181 Op::CreateEntity(CreateEntity {
1182 id: [1u8; 16],
1183 values: vec![
1184 PropertyValue {
1185 property: prop,
1186 value: Value::Text {
1187 value: Cow::Owned("First".to_string()),
1188 language: None,
1189 },
1190 },
1191 PropertyValue {
1192 property: prop,
1193 value: Value::Text {
1194 value: Cow::Owned("Second".to_string()),
1195 language: None,
1196 },
1197 },
1198 ],
1199 context: None,
1200 }),
1201 ],
1202 };
1203
1204 let result = encode_edit_with_options(&edit, EncodeOptions::canonical());
1206 assert!(matches!(result, Err(EncodeError::DuplicateValue { .. })));
1207 }
1208
1209 #[test]
1210 fn test_canonical_allows_different_languages() {
1211 let prop = [10u8; 16];
1212 let lang_en = [20u8; 16];
1213 let lang_es = [21u8; 16];
1214
1215 let edit: Edit<'static> = Edit {
1216 id: [0u8; 16],
1217 name: Cow::Owned("Test".to_string()),
1218 authors: vec![],
1219 created_at: 0,
1220 ops: vec![
1221 Op::CreateEntity(CreateEntity {
1222 id: [1u8; 16],
1223 values: vec![
1224 PropertyValue {
1225 property: prop,
1226 value: Value::Text {
1227 value: Cow::Owned("Hello".to_string()),
1228 language: Some(lang_en),
1229 },
1230 },
1231 PropertyValue {
1232 property: prop,
1233 value: Value::Text {
1234 value: Cow::Owned("Hola".to_string()),
1235 language: Some(lang_es),
1236 },
1237 },
1238 ],
1239 context: None,
1240 }),
1241 ],
1242 };
1243
1244 let result = encode_edit_with_options(&edit, EncodeOptions::canonical());
1246 assert!(result.is_ok());
1247 }
1248
1249 #[test]
1250 fn test_canonical_sorts_values_deterministically() {
1251 let prop_a = [0x0A; 16];
1252 let prop_b = [0x0B; 16];
1253
1254 let edit: Edit<'static> = Edit {
1256 id: [1u8; 16],
1257 name: Cow::Owned("Test".to_string()),
1258 authors: vec![],
1259 created_at: 0,
1260 ops: vec![
1261 Op::CreateEntity(CreateEntity {
1262 id: [3u8; 16],
1263 values: vec![
1264 PropertyValue {
1265 property: prop_b, value: Value::Int64 { value: 42, unit: None },
1267 },
1268 PropertyValue {
1269 property: prop_a, value: Value::Text {
1271 value: Cow::Owned("Hello".to_string()),
1272 language: None,
1273 },
1274 },
1275 ],
1276 context: None,
1277 }),
1278 ],
1279 };
1280
1281 let encoded1 = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap();
1283 let encoded2 = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap();
1284 assert_eq!(encoded1, encoded2, "Canonical encoding should be deterministic");
1285
1286 let decoded = decode_edit(&encoded1).unwrap();
1288 assert_eq!(decoded.ops.len(), 1);
1289 }
1290}