1use std::borrow::Cow;
6use std::io::Read;
7
8use rustc_hash::{FxHashMap, FxHashSet};
9
10use crate::codec::op::{decode_op, encode_op};
11use crate::codec::primitives::{Reader, Writer};
12use crate::error::{DecodeError, EncodeError};
13use crate::limits::{
14 FORMAT_VERSION, MAGIC_COMPRESSED, MAGIC_UNCOMPRESSED, MAX_AUTHORS, MAX_DICT_SIZE,
15 MAX_EDIT_SIZE, MAX_OPS_PER_EDIT, MAX_STRING_LEN, MIN_FORMAT_VERSION,
16};
17use crate::model::{Context, ContextEdge, DataType, DictionaryBuilder, Edit, Id, Op, WireDictionaries};
18
19pub fn decompress(input: &[u8]) -> Result<Vec<u8>, DecodeError> {
33 if input.len() < 5 {
34 return Err(DecodeError::UnexpectedEof { context: "magic" });
35 }
36 if &input[0..5] != MAGIC_COMPRESSED {
37 let mut found = [0u8; 4];
38 found.copy_from_slice(&input[0..4]);
39 return Err(DecodeError::InvalidMagic { found });
40 }
41 decompress_zstd(&input[5..])
42}
43
44pub fn decode_edit(input: &[u8]) -> Result<Edit<'_>, DecodeError> {
61 if input.len() < 4 {
62 return Err(DecodeError::UnexpectedEof { context: "magic" });
63 }
64
65 if input.len() >= 5 && &input[0..5] == MAGIC_COMPRESSED {
67 let decompressed = decompress_zstd(&input[5..])?;
70 if decompressed.len() > MAX_EDIT_SIZE {
71 return Err(DecodeError::LengthExceedsLimit {
72 field: "edit",
73 len: decompressed.len(),
74 max: MAX_EDIT_SIZE,
75 });
76 }
77 decode_edit_owned(&decompressed)
78 } else if &input[0..4] == MAGIC_UNCOMPRESSED {
79 if input.len() > MAX_EDIT_SIZE {
81 return Err(DecodeError::LengthExceedsLimit {
82 field: "edit",
83 len: input.len(),
84 max: MAX_EDIT_SIZE,
85 });
86 }
87 decode_edit_borrowed(input)
88 } else {
89 let mut found = [0u8; 4];
90 found.copy_from_slice(&input[0..4]);
91 Err(DecodeError::InvalidMagic { found })
92 }
93}
94
95fn decode_edit_borrowed(input: &[u8]) -> Result<Edit<'_>, DecodeError> {
97 let mut reader = Reader::new(input);
98
99 reader.read_bytes(4, "magic")?;
101
102 let version = reader.read_byte("version")?;
104 if version < MIN_FORMAT_VERSION || version > FORMAT_VERSION {
105 return Err(DecodeError::UnsupportedVersion { version });
106 }
107
108 let edit_id = reader.read_id("edit_id")?;
110 let name = Cow::Borrowed(reader.read_str(MAX_STRING_LEN, "name")?);
111 let authors = reader.read_id_vec(MAX_AUTHORS, "authors")?;
112 let created_at = reader.read_signed_varint("created_at")?;
113
114 let property_count = reader.read_varint("property_count")? as usize;
116 if property_count > MAX_DICT_SIZE {
117 return Err(DecodeError::LengthExceedsLimit {
118 field: "properties",
119 len: property_count,
120 max: MAX_DICT_SIZE,
121 });
122 }
123 let mut properties = Vec::with_capacity(property_count);
124 let mut seen_props = FxHashSet::with_capacity_and_hasher(property_count, Default::default());
125 for _ in 0..property_count {
126 let id = reader.read_id("property_id")?;
127 if !seen_props.insert(id) {
128 return Err(DecodeError::DuplicateDictionaryEntry { dict: "properties", id });
129 }
130 let dt_byte = reader.read_byte("data_type")?;
131 let data_type = DataType::from_u8(dt_byte)
132 .ok_or(DecodeError::InvalidDataType { data_type: dt_byte })?;
133 properties.push((id, data_type));
134 }
135
136 let relation_types = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "relation_types")?;
137 let languages = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "languages")?;
138 let units = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "units")?;
139 let objects = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "objects")?;
140 let context_ids = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "context_ids")?;
141
142 let mut dicts = WireDictionaries {
143 properties,
144 relation_types,
145 languages,
146 units,
147 objects,
148 context_ids,
149 contexts: Vec::new(),
150 };
151
152 let context_count = reader.read_varint("context_count")? as usize;
154 if context_count > MAX_DICT_SIZE {
155 return Err(DecodeError::LengthExceedsLimit {
156 field: "contexts",
157 len: context_count,
158 max: MAX_DICT_SIZE,
159 });
160 }
161 for _ in 0..context_count {
162 dicts.contexts.push(decode_context(&mut reader, &dicts)?);
163 }
164
165 let op_count = reader.read_varint("op_count")? as usize;
167 if op_count > MAX_OPS_PER_EDIT {
168 return Err(DecodeError::LengthExceedsLimit {
169 field: "ops",
170 len: op_count,
171 max: MAX_OPS_PER_EDIT,
172 });
173 }
174
175 let mut ops = Vec::with_capacity(op_count);
176 for _ in 0..op_count {
177 ops.push(decode_op(&mut reader, &dicts)?);
178 }
179
180 Ok(Edit {
181 id: edit_id,
182 name,
183 authors,
184 created_at,
185 ops,
186 })
187}
188
189fn decode_edit_owned(data: &[u8]) -> Result<Edit<'static>, DecodeError> {
191 let mut reader = Reader::new(data);
192
193 reader.read_bytes(4, "magic")?;
195
196 let version = reader.read_byte("version")?;
198 if version < MIN_FORMAT_VERSION || version > FORMAT_VERSION {
199 return Err(DecodeError::UnsupportedVersion { version });
200 }
201
202 let edit_id = reader.read_id("edit_id")?;
204 let name = Cow::Owned(reader.read_string(MAX_STRING_LEN, "name")?);
205 let authors = reader.read_id_vec(MAX_AUTHORS, "authors")?;
206 let created_at = reader.read_signed_varint("created_at")?;
207
208 let property_count = reader.read_varint("property_count")? as usize;
210 if property_count > MAX_DICT_SIZE {
211 return Err(DecodeError::LengthExceedsLimit {
212 field: "properties",
213 len: property_count,
214 max: MAX_DICT_SIZE,
215 });
216 }
217 let mut properties = Vec::with_capacity(property_count);
218 let mut seen_props = FxHashSet::with_capacity_and_hasher(property_count, Default::default());
219 for _ in 0..property_count {
220 let id = reader.read_id("property_id")?;
221 if !seen_props.insert(id) {
222 return Err(DecodeError::DuplicateDictionaryEntry { dict: "properties", id });
223 }
224 let dt_byte = reader.read_byte("data_type")?;
225 let data_type = DataType::from_u8(dt_byte)
226 .ok_or(DecodeError::InvalidDataType { data_type: dt_byte })?;
227 properties.push((id, data_type));
228 }
229
230 let relation_types = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "relation_types")?;
231 let languages = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "languages")?;
232 let units = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "units")?;
233 let objects = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "objects")?;
234 let context_ids = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "context_ids")?;
235
236 let mut dicts = WireDictionaries {
237 properties,
238 relation_types,
239 languages,
240 units,
241 objects,
242 context_ids,
243 contexts: Vec::new(),
244 };
245
246 let context_count = reader.read_varint("context_count")? as usize;
248 if context_count > MAX_DICT_SIZE {
249 return Err(DecodeError::LengthExceedsLimit {
250 field: "contexts",
251 len: context_count,
252 max: MAX_DICT_SIZE,
253 });
254 }
255 for _ in 0..context_count {
256 dicts.contexts.push(decode_context(&mut reader, &dicts)?);
257 }
258
259 let op_count = reader.read_varint("op_count")? as usize;
261 if op_count > MAX_OPS_PER_EDIT {
262 return Err(DecodeError::LengthExceedsLimit {
263 field: "ops",
264 len: op_count,
265 max: MAX_OPS_PER_EDIT,
266 });
267 }
268
269 let mut ops = Vec::with_capacity(op_count);
270 for _ in 0..op_count {
271 ops.push(decode_op_owned(&mut reader, &dicts)?);
272 }
273
274 Ok(Edit {
275 id: edit_id,
276 name,
277 authors,
278 created_at,
279 ops,
280 })
281}
282
283fn decode_op_owned(reader: &mut Reader<'_>, dicts: &WireDictionaries) -> Result<Op<'static>, DecodeError> {
285 let op = decode_op(reader, dicts)?;
287 Ok(op_to_owned(op))
288}
289
290fn decode_context(reader: &mut Reader<'_>, dicts: &WireDictionaries) -> Result<Context, DecodeError> {
292 let root_id_index = reader.read_varint("root_id")? as usize;
293 if root_id_index >= dicts.context_ids.len() {
294 return Err(DecodeError::IndexOutOfBounds {
295 dict: "context_ids",
296 index: root_id_index,
297 size: dicts.context_ids.len(),
298 });
299 }
300 let root_id = dicts.context_ids[root_id_index];
301
302 let edge_count = reader.read_varint("edge_count")? as usize;
303 if edge_count > MAX_DICT_SIZE {
304 return Err(DecodeError::LengthExceedsLimit {
305 field: "context_edges",
306 len: edge_count,
307 max: MAX_DICT_SIZE,
308 });
309 }
310
311 let mut edges = Vec::with_capacity(edge_count);
312 for _ in 0..edge_count {
313 let type_id_index = reader.read_varint("edge_type_id")? as usize;
314 if type_id_index >= dicts.relation_types.len() {
315 return Err(DecodeError::IndexOutOfBounds {
316 dict: "relation_types",
317 index: type_id_index,
318 size: dicts.relation_types.len(),
319 });
320 }
321 let type_id = dicts.relation_types[type_id_index];
322
323 let to_entity_id_index = reader.read_varint("edge_to_entity_id")? as usize;
324 if to_entity_id_index >= dicts.context_ids.len() {
325 return Err(DecodeError::IndexOutOfBounds {
326 dict: "context_ids",
327 index: to_entity_id_index,
328 size: dicts.context_ids.len(),
329 });
330 }
331 let to_entity_id = dicts.context_ids[to_entity_id_index];
332
333 edges.push(ContextEdge { type_id, to_entity_id });
334 }
335
336 Ok(Context { root_id, edges })
337}
338
339fn op_to_owned(op: Op<'_>) -> Op<'static> {
341 match op {
342 Op::CreateEntity(ce) => Op::CreateEntity(crate::model::CreateEntity {
343 id: ce.id,
344 values: ce.values.into_iter().map(pv_to_owned).collect(),
345 context: ce.context,
346 }),
347 Op::UpdateEntity(ue) => Op::UpdateEntity(crate::model::UpdateEntity {
348 id: ue.id,
349 set_properties: ue.set_properties.into_iter().map(pv_to_owned).collect(),
350 unset_values: ue.unset_values,
351 context: ue.context,
352 }),
353 Op::DeleteEntity(de) => Op::DeleteEntity(de),
354 Op::RestoreEntity(re) => Op::RestoreEntity(re),
355 Op::CreateRelation(cr) => Op::CreateRelation(crate::model::CreateRelation {
356 id: cr.id,
357 relation_type: cr.relation_type,
358 from: cr.from,
359 from_is_value_ref: cr.from_is_value_ref,
360 to: cr.to,
361 to_is_value_ref: cr.to_is_value_ref,
362 entity: cr.entity,
363 position: cr.position.map(|p| Cow::Owned(p.into_owned())),
364 from_space: cr.from_space,
365 from_version: cr.from_version,
366 to_space: cr.to_space,
367 to_version: cr.to_version,
368 context: cr.context,
369 }),
370 Op::UpdateRelation(ur) => Op::UpdateRelation(crate::model::UpdateRelation {
371 id: ur.id,
372 from_space: ur.from_space,
373 from_version: ur.from_version,
374 to_space: ur.to_space,
375 to_version: ur.to_version,
376 position: ur.position.map(|p| Cow::Owned(p.into_owned())),
377 unset: ur.unset,
378 context: ur.context,
379 }),
380 Op::DeleteRelation(dr) => Op::DeleteRelation(dr),
381 Op::RestoreRelation(rr) => Op::RestoreRelation(rr),
382 Op::CreateValueRef(cvr) => Op::CreateValueRef(cvr),
383 }
384}
385
386fn pv_to_owned(pv: crate::model::PropertyValue<'_>) -> crate::model::PropertyValue<'static> {
388 crate::model::PropertyValue {
389 property: pv.property,
390 value: value_to_owned(pv.value),
391 }
392}
393
394fn value_to_owned(v: crate::model::Value<'_>) -> crate::model::Value<'static> {
396 use crate::model::{DecimalMantissa, Value};
397 match v {
398 Value::Bool(b) => Value::Bool(b),
399 Value::Int64 { value, unit } => Value::Int64 { value, unit },
400 Value::Float64 { value, unit } => Value::Float64 { value, unit },
401 Value::Decimal { exponent, mantissa, unit } => Value::Decimal {
402 exponent,
403 mantissa: match mantissa {
404 DecimalMantissa::I64(i) => DecimalMantissa::I64(i),
405 DecimalMantissa::Big(b) => DecimalMantissa::Big(Cow::Owned(b.into_owned())),
406 },
407 unit,
408 },
409 Value::Text { value, language } => Value::Text {
410 value: Cow::Owned(value.into_owned()),
411 language,
412 },
413 Value::Bytes(b) => Value::Bytes(Cow::Owned(b.into_owned())),
414 Value::Date { days, offset_min } => Value::Date { days, offset_min },
415 Value::Time { time_us, offset_min } => Value::Time { time_us, offset_min },
416 Value::Datetime { epoch_us, offset_min } => Value::Datetime { epoch_us, offset_min },
417 Value::Schedule(s) => Value::Schedule(Cow::Owned(s.into_owned())),
418 Value::Point { lon, lat, alt } => Value::Point { lon, lat, alt },
419 Value::Embedding { sub_type, dims, data } => Value::Embedding {
420 sub_type,
421 dims,
422 data: Cow::Owned(data.into_owned()),
423 },
424 }
425}
426
427fn read_id_vec_no_duplicates(
429 reader: &mut Reader<'_>,
430 max_len: usize,
431 field: &'static str,
432) -> Result<Vec<Id>, DecodeError> {
433 let count = reader.read_varint(field)? as usize;
434 if count > max_len {
435 return Err(DecodeError::LengthExceedsLimit {
436 field,
437 len: count,
438 max: max_len,
439 });
440 }
441
442 let mut ids = Vec::with_capacity(count);
443 let mut seen = FxHashSet::with_capacity_and_hasher(count, Default::default());
444
445 for _ in 0..count {
446 let id = reader.read_id(field)?;
447 if !seen.insert(id) {
448 return Err(DecodeError::DuplicateDictionaryEntry { dict: field, id });
449 }
450 ids.push(id);
451 }
452
453 Ok(ids)
454}
455
456fn decompress_zstd(compressed: &[u8]) -> Result<Vec<u8>, DecodeError> {
457 let mut reader = Reader::new(compressed);
459 let declared_size = reader.read_varint("uncompressed_size")? as usize;
460
461 if declared_size > MAX_EDIT_SIZE {
462 return Err(DecodeError::LengthExceedsLimit {
463 field: "uncompressed_size",
464 len: declared_size,
465 max: MAX_EDIT_SIZE,
466 });
467 }
468
469 let compressed_data = reader.remaining();
470
471 let mut decoder = zstd::Decoder::new(compressed_data)
472 .map_err(|e| DecodeError::DecompressionFailed(e.to_string()))?;
473
474 let mut decompressed = Vec::with_capacity(declared_size);
475 decoder
476 .read_to_end(&mut decompressed)
477 .map_err(|e| DecodeError::DecompressionFailed(e.to_string()))?;
478
479 if decompressed.len() != declared_size {
480 return Err(DecodeError::UncompressedSizeMismatch {
481 declared: declared_size,
482 actual: decompressed.len(),
483 });
484 }
485
486 Ok(decompressed)
487}
488
489#[derive(Debug, Clone, Copy, Default)]
495pub struct EncodeOptions {
496 pub canonical: bool,
510}
511
512impl EncodeOptions {
513 pub fn new() -> Self {
515 Self::default()
516 }
517
518 pub fn canonical() -> Self {
520 Self { canonical: true }
521 }
522}
523
524pub fn encode_edit(edit: &Edit) -> Result<Vec<u8>, EncodeError> {
529 encode_edit_with_options(edit, EncodeOptions::default())
530}
531
532pub fn encode_edit_with_options(edit: &Edit, options: EncodeOptions) -> Result<Vec<u8>, EncodeError> {
534 if options.canonical {
535 encode_edit_canonical(edit)
536 } else {
537 encode_edit_fast(edit)
538 }
539}
540
541fn encode_edit_fast(edit: &Edit) -> Result<Vec<u8>, EncodeError> {
543 let property_types = rustc_hash::FxHashMap::default();
545
546 let mut dict_builder = DictionaryBuilder::with_capacity(edit.ops.len());
548
549 let mut ops_writer = Writer::with_capacity(edit.ops.len() * 50);
551
552 for op in &edit.ops {
553 encode_op(&mut ops_writer, op, &mut dict_builder, &property_types)?;
554 }
555
556 let ops_bytes = ops_writer.into_bytes();
558 let mut writer = Writer::with_capacity(256 + ops_bytes.len());
559
560 writer.write_bytes(MAGIC_UNCOMPRESSED);
562 writer.write_byte(FORMAT_VERSION);
563
564 writer.write_id(&edit.id);
566 writer.write_string(&edit.name);
567 writer.write_id_vec(&edit.authors);
568 writer.write_signed_varint(edit.created_at);
569
570 dict_builder.write_dictionaries(&mut writer);
572
573 dict_builder.write_contexts(&mut writer);
575
576 writer.write_varint(edit.ops.len() as u64);
578 writer.write_bytes(&ops_bytes);
579
580 Ok(writer.into_bytes())
581}
582
583fn encode_edit_canonical(edit: &Edit) -> Result<Vec<u8>, EncodeError> {
594 let property_types = rustc_hash::FxHashMap::default();
596
597 let mut dict_builder = DictionaryBuilder::with_capacity(edit.ops.len());
599
600 let mut temp_writer = Writer::with_capacity(edit.ops.len() * 50);
602 for op in &edit.ops {
603 encode_op(&mut temp_writer, op, &mut dict_builder, &property_types)?;
604 }
605
606 let sorted_builder = dict_builder.into_sorted();
608
609 let mut sorted_authors = edit.authors.clone();
611 sorted_authors.sort();
612 for i in 1..sorted_authors.len() {
614 if sorted_authors[i] == sorted_authors[i - 1] {
615 return Err(EncodeError::DuplicateAuthor { id: sorted_authors[i] });
616 }
617 }
618
619 let mut ops_writer = Writer::with_capacity(edit.ops.len() * 50);
621 let mut canonical_builder = sorted_builder.clone();
622 for op in &edit.ops {
623 encode_op_canonical(&mut ops_writer, op, &mut canonical_builder, &property_types)?;
624 }
625
626 let ops_bytes = ops_writer.into_bytes();
628 let mut writer = Writer::with_capacity(256 + ops_bytes.len());
629
630 writer.write_bytes(MAGIC_UNCOMPRESSED);
632 writer.write_byte(FORMAT_VERSION);
633
634 writer.write_id(&edit.id);
636 writer.write_string(&edit.name);
637 writer.write_id_vec(&sorted_authors);
638 writer.write_signed_varint(edit.created_at);
639
640 sorted_builder.write_dictionaries(&mut writer);
642
643 sorted_builder.write_contexts(&mut writer);
645
646 writer.write_varint(edit.ops.len() as u64);
648 writer.write_bytes(&ops_bytes);
649
650 Ok(writer.into_bytes())
651}
652
653fn encode_op_canonical(
655 writer: &mut Writer,
656 op: &Op<'_>,
657 dict_builder: &mut DictionaryBuilder,
658 property_types: &FxHashMap<Id, DataType>,
659) -> Result<(), EncodeError> {
660 match op {
661 Op::CreateEntity(ce) => {
662 let sorted_values = sort_and_check_values(&ce.values, dict_builder)?;
664
665 writer.write_byte(1); writer.write_id(&ce.id);
667 writer.write_varint(sorted_values.len() as u64);
668
669 for pv in &sorted_values {
670 let data_type = property_types.get(&pv.property)
671 .copied()
672 .unwrap_or_else(|| pv.value.data_type());
673 encode_property_value_canonical(writer, pv, dict_builder, data_type)?;
674 }
675 let context_ref = match &ce.context {
677 Some(ctx) => dict_builder.add_context(ctx) as u32,
678 None => 0xFFFFFFFF,
679 };
680 writer.write_varint(context_ref as u64);
681 Ok(())
682 }
683 Op::UpdateEntity(ue) => {
684 let sorted_set = sort_and_check_values(&ue.set_properties, dict_builder)?;
686 let sorted_unset = sort_and_check_unsets(&ue.unset_values, dict_builder)?;
687
688 writer.write_byte(2); let id_index = dict_builder.add_object(ue.id);
690 writer.write_varint(id_index as u64);
691
692 let mut flags = 0u8;
693 if !sorted_set.is_empty() {
694 flags |= 0x01; }
696 if !sorted_unset.is_empty() {
697 flags |= 0x02; }
699 writer.write_byte(flags);
700
701 if !sorted_set.is_empty() {
702 writer.write_varint(sorted_set.len() as u64);
703 for pv in &sorted_set {
704 let data_type = property_types.get(&pv.property)
705 .copied()
706 .unwrap_or_else(|| pv.value.data_type());
707 encode_property_value_canonical(writer, pv, dict_builder, data_type)?;
708 }
709 }
710
711 if !sorted_unset.is_empty() {
712 use crate::model::UnsetLanguage;
713 writer.write_varint(sorted_unset.len() as u64);
714 for unset in &sorted_unset {
715 let prop_idx = dict_builder.add_property(unset.property, DataType::Bool);
716 writer.write_varint(prop_idx as u64);
717 let lang_value: u32 = match &unset.language {
718 UnsetLanguage::All => 0xFFFFFFFF,
719 UnsetLanguage::English => 0,
720 UnsetLanguage::Specific(lang_id) => {
721 dict_builder.add_language(Some(*lang_id)) as u32
722 }
723 };
724 writer.write_varint(lang_value as u64);
725 }
726 }
727 let context_ref = match &ue.context {
729 Some(ctx) => dict_builder.add_context(ctx) as u32,
730 None => 0xFFFFFFFF,
731 };
732 writer.write_varint(context_ref as u64);
733 Ok(())
734 }
735 _ => encode_op(writer, op, dict_builder, property_types),
737 }
738}
739
740fn sort_and_check_values<'a>(
742 values: &[crate::model::PropertyValue<'a>],
743 dict_builder: &DictionaryBuilder,
744) -> Result<Vec<crate::model::PropertyValue<'a>>, EncodeError> {
745 use crate::model::{PropertyValue, Value};
746
747 if values.is_empty() {
748 return Ok(Vec::new());
749 }
750
751 let mut indexed: Vec<(usize, usize, usize, &PropertyValue<'a>)> = values
753 .iter()
754 .enumerate()
755 .map(|(i, pv)| {
756 let prop_idx = dict_builder.get_property_index(&pv.property).unwrap_or(0);
757 let lang_idx = match &pv.value {
758 Value::Text { language, .. } => dict_builder.get_language_index(language.as_ref()).unwrap_or(0),
759 _ => 0,
760 };
761 (prop_idx, lang_idx, i, pv)
762 })
763 .collect();
764
765 indexed.sort_by(|a, b| (a.0, a.1).cmp(&(b.0, b.1)));
767
768 for i in 1..indexed.len() {
770 if indexed[i].0 == indexed[i - 1].0 && indexed[i].1 == indexed[i - 1].1 {
771 let pv = indexed[i].3;
772 let language = match &pv.value {
773 Value::Text { language, .. } => *language,
774 _ => None,
775 };
776 return Err(EncodeError::DuplicateValue {
777 property: pv.property,
778 language,
779 });
780 }
781 }
782
783 Ok(indexed.into_iter().map(|(_, _, _, pv)| pv.clone()).collect())
785}
786
787fn sort_and_check_unsets(
789 unsets: &[crate::model::UnsetValue],
790 dict_builder: &DictionaryBuilder,
791) -> Result<Vec<crate::model::UnsetValue>, EncodeError> {
792 use crate::model::UnsetLanguage;
793
794 if unsets.is_empty() {
795 return Ok(Vec::new());
796 }
797
798 let mut indexed: Vec<(usize, u32, usize, &crate::model::UnsetValue)> = unsets
800 .iter()
801 .enumerate()
802 .map(|(i, up)| {
803 let prop_idx = dict_builder.get_property_index(&up.property).unwrap_or(0);
804 let lang_key: u32 = match &up.language {
805 UnsetLanguage::All => 0xFFFFFFFF,
806 UnsetLanguage::English => 0,
807 UnsetLanguage::Specific(lang_id) => {
808 dict_builder.get_language_index(Some(lang_id)).unwrap_or(0) as u32
809 }
810 };
811 (prop_idx, lang_key, i, up)
812 })
813 .collect();
814
815 indexed.sort_by(|a, b| (a.0, a.1).cmp(&(b.0, b.1)));
817
818 for i in 1..indexed.len() {
820 if indexed[i].0 == indexed[i - 1].0 && indexed[i].1 == indexed[i - 1].1 {
821 let up = indexed[i].3;
822 let language = match &up.language {
823 UnsetLanguage::All => None,
824 UnsetLanguage::English => None,
825 UnsetLanguage::Specific(id) => Some(*id),
826 };
827 return Err(EncodeError::DuplicateUnset {
828 property: up.property,
829 language,
830 });
831 }
832 }
833
834 Ok(indexed.into_iter().map(|(_, _, _, up)| up.clone()).collect())
835}
836
837fn encode_property_value_canonical(
839 writer: &mut Writer,
840 pv: &crate::model::PropertyValue<'_>,
841 dict_builder: &mut DictionaryBuilder,
842 data_type: DataType,
843) -> Result<(), EncodeError> {
844 let prop_index = dict_builder.add_property(pv.property, data_type);
845 writer.write_varint(prop_index as u64);
846 crate::codec::value::encode_value(writer, &pv.value, dict_builder)?;
847 Ok(())
848}
849
850pub fn encode_edit_profiled(edit: &Edit, profile: bool) -> Result<Vec<u8>, EncodeError> {
852 if !profile {
853 return encode_edit(edit);
854 }
855
856 use std::time::Instant;
857
858 let t0 = Instant::now();
859
860 let property_types = rustc_hash::FxHashMap::default();
862 let t1 = Instant::now();
863
864 let mut dict_builder = DictionaryBuilder::with_capacity(edit.ops.len());
866
867 let mut ops_writer = Writer::with_capacity(edit.ops.len() * 50);
869
870 for op in &edit.ops {
871 encode_op(&mut ops_writer, op, &mut dict_builder, &property_types)?;
872 }
873 let t2 = Instant::now();
874
875 let ops_bytes = ops_writer.into_bytes();
877 let mut writer = Writer::with_capacity(256 + ops_bytes.len());
878
879 writer.write_bytes(MAGIC_UNCOMPRESSED);
880 writer.write_byte(FORMAT_VERSION);
881 writer.write_id(&edit.id);
882 writer.write_string(&edit.name);
883 writer.write_id_vec(&edit.authors);
884 writer.write_signed_varint(edit.created_at);
885 dict_builder.write_dictionaries(&mut writer);
886 dict_builder.write_contexts(&mut writer);
887 writer.write_varint(edit.ops.len() as u64);
888 writer.write_bytes(&ops_bytes);
889 let t3 = Instant::now();
890
891 let result = writer.into_bytes();
892
893 let total = t3.duration_since(t0);
894 eprintln!("=== Encode Profile (single-pass) ===");
895 eprintln!(" setup: {:?} ({:.1}%)", t1.duration_since(t0), 100.0 * t1.duration_since(t0).as_secs_f64() / total.as_secs_f64());
896 eprintln!(" encode_ops + build_dicts: {:?} ({:.1}%)", t2.duration_since(t1), 100.0 * t2.duration_since(t1).as_secs_f64() / total.as_secs_f64());
897 eprintln!(" assemble output: {:?} ({:.1}%)", t3.duration_since(t2), 100.0 * t3.duration_since(t2).as_secs_f64() / total.as_secs_f64());
898 eprintln!(" TOTAL: {:?}", total);
899
900 Ok(result)
901}
902
903pub fn encode_edit_compressed(edit: &Edit, level: i32) -> Result<Vec<u8>, EncodeError> {
905 encode_edit_compressed_with_options(edit, level, EncodeOptions::default())
906}
907
908pub fn encode_edit_compressed_with_options(
910 edit: &Edit,
911 level: i32,
912 options: EncodeOptions,
913) -> Result<Vec<u8>, EncodeError> {
914 let uncompressed = encode_edit_with_options(edit, options)?;
915
916 let compressed = zstd::encode_all(uncompressed.as_slice(), level)
917 .map_err(|e| EncodeError::CompressionFailed(e.to_string()))?;
918
919 let mut writer = Writer::with_capacity(5 + 10 + compressed.len());
920 writer.write_bytes(MAGIC_COMPRESSED);
921 writer.write_varint(uncompressed.len() as u64);
922 writer.write_bytes(&compressed);
923
924 Ok(writer.into_bytes())
925}
926
927#[cfg(test)]
928mod tests {
929 use super::*;
930 use crate::model::{CreateEntity, PropertyValue, Value};
931
932 fn make_test_edit() -> Edit<'static> {
933 Edit {
934 id: [1u8; 16],
935 name: Cow::Owned("Test Edit".to_string()),
936 authors: vec![[2u8; 16]],
937 created_at: 1234567890,
938 ops: vec![
939 Op::CreateEntity(CreateEntity {
940 id: [3u8; 16],
941 values: vec![PropertyValue {
942 property: [10u8; 16],
943 value: Value::Text {
944 value: Cow::Owned("Hello".to_string()),
945 language: None,
946 },
947 }],
948 context: None,
949 }),
950 ],
951 }
952 }
953
954 #[test]
955 fn test_edit_roundtrip() {
956 let edit = make_test_edit();
957
958 let encoded = encode_edit(&edit).unwrap();
959 let decoded = decode_edit(&encoded).unwrap();
960
961 assert_eq!(edit.id, decoded.id);
962 assert_eq!(edit.name, decoded.name);
963 assert_eq!(edit.authors, decoded.authors);
964 assert_eq!(edit.created_at, decoded.created_at);
965 assert_eq!(edit.ops.len(), decoded.ops.len());
966 }
967
968 #[test]
969 fn test_edit_compressed_roundtrip() {
970 let edit = make_test_edit();
971
972 let encoded = encode_edit_compressed(&edit, 3).unwrap();
973 let decoded = decode_edit(&encoded).unwrap();
974
975 assert_eq!(edit.id, decoded.id);
976 assert_eq!(edit.name, decoded.name);
977 assert_eq!(edit.authors, decoded.authors);
978 assert_eq!(edit.created_at, decoded.created_at);
979 assert_eq!(edit.ops.len(), decoded.ops.len());
980 }
981
982 #[test]
983 fn test_compression_magic() {
984 let edit = make_test_edit();
985
986 let uncompressed = encode_edit(&edit).unwrap();
987 let compressed = encode_edit_compressed(&edit, 3).unwrap();
988
989 assert_eq!(&uncompressed[0..4], b"GRC2");
990 assert_eq!(&compressed[0..5], b"GRC2Z");
991 }
992
993 #[test]
994 fn test_invalid_magic() {
995 let data = b"XXXX";
996 let result = decode_edit(data);
997 assert!(matches!(result, Err(DecodeError::InvalidMagic { .. })));
998 }
999
1000 #[test]
1001 fn test_unsupported_version() {
1002 let mut data = Vec::new();
1003 data.extend_from_slice(MAGIC_UNCOMPRESSED);
1004 data.push(99); data.extend_from_slice(&[0u8; 100]);
1007
1008 let result = decode_edit(&data);
1009 assert!(matches!(result, Err(DecodeError::UnsupportedVersion { version: 99 })));
1010 }
1011
1012 #[test]
1013 fn test_empty_edit() {
1014 let edit: Edit<'static> = Edit {
1015 id: [0u8; 16],
1016 name: Cow::Borrowed(""),
1017 authors: vec![],
1018 created_at: 0,
1019 ops: vec![],
1020 };
1021
1022 let encoded = encode_edit(&edit).unwrap();
1023 let decoded = decode_edit(&encoded).unwrap();
1024
1025 assert_eq!(edit.id, decoded.id);
1026 assert!(decoded.name.is_empty());
1027 assert!(decoded.authors.is_empty());
1028 assert!(decoded.ops.is_empty());
1029 }
1030
1031 #[test]
1032 fn test_canonical_encoding_deterministic() {
1033 let prop_a = [0x0A; 16]; let prop_b = [0x0B; 16]; let edit1: Edit<'static> = Edit {
1041 id: [1u8; 16],
1042 name: Cow::Owned("Test".to_string()),
1043 authors: vec![],
1044 created_at: 0,
1045 ops: vec![
1046 Op::CreateEntity(CreateEntity {
1047 id: [3u8; 16],
1048 values: vec![
1049 PropertyValue {
1050 property: prop_a,
1051 value: Value::Text {
1052 value: Cow::Owned("Hello".to_string()),
1053 language: None,
1054 },
1055 },
1056 PropertyValue {
1057 property: prop_b,
1058 value: Value::Int64 { value: 42, unit: None },
1059 },
1060 ],
1061 context: None,
1062 }),
1063 ],
1064 };
1065
1066 let edit2: Edit<'static> = Edit {
1068 id: [1u8; 16],
1069 name: Cow::Owned("Test".to_string()),
1070 authors: vec![],
1071 created_at: 0,
1072 ops: vec![
1073 Op::CreateEntity(CreateEntity {
1074 id: [3u8; 16],
1075 values: vec![
1076 PropertyValue {
1078 property: prop_b,
1079 value: Value::Int64 { value: 42, unit: None },
1080 },
1081 PropertyValue {
1082 property: prop_a,
1083 value: Value::Text {
1084 value: Cow::Owned("Hello".to_string()),
1085 language: None,
1086 },
1087 },
1088 ],
1089 context: None,
1090 }),
1091 ],
1092 };
1093
1094 let fast1 = encode_edit_with_options(&edit1, EncodeOptions::new()).unwrap();
1096 let fast2 = encode_edit_with_options(&edit2, EncodeOptions::new()).unwrap();
1097 let canonical1 = encode_edit_with_options(&edit1, EncodeOptions::canonical()).unwrap();
1102 let canonical2 = encode_edit_with_options(&edit2, EncodeOptions::canonical()).unwrap();
1103
1104 let decoded1 = decode_edit(&canonical1).unwrap();
1106 let decoded2 = decode_edit(&canonical2).unwrap();
1107 assert_eq!(decoded1.id, edit1.id);
1108 assert_eq!(decoded2.id, edit2.id);
1109
1110 assert_eq!(
1114 &canonical1[..50], &canonical2[..50],
1116 "Canonical encoding should produce identical dictionary bytes"
1117 );
1118
1119 let _ = fast1;
1121 let _ = fast2;
1122 }
1123
1124 #[test]
1125 fn test_canonical_encoding_roundtrip() {
1126 let edit = make_test_edit();
1127
1128 let encoded = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap();
1129 let decoded = decode_edit(&encoded).unwrap();
1130
1131 assert_eq!(edit.id, decoded.id);
1132 assert_eq!(edit.name, decoded.name);
1133 assert_eq!(edit.authors, decoded.authors);
1134 assert_eq!(edit.created_at, decoded.created_at);
1135 assert_eq!(edit.ops.len(), decoded.ops.len());
1136 }
1137
1138 #[test]
1139 fn test_canonical_encoding_compressed() {
1140 let edit = make_test_edit();
1141
1142 let encoded = encode_edit_compressed_with_options(&edit, 3, EncodeOptions::canonical()).unwrap();
1143 let decoded = decode_edit(&encoded).unwrap();
1144
1145 assert_eq!(edit.id, decoded.id);
1146 assert_eq!(edit.name, decoded.name);
1147 }
1148
1149 #[test]
1150 fn test_canonical_rejects_duplicate_authors() {
1151 let author1 = [1u8; 16];
1152
1153 let edit: Edit<'static> = Edit {
1154 id: [0u8; 16],
1155 name: Cow::Owned("Test".to_string()),
1156 authors: vec![author1, author1], created_at: 0,
1158 ops: vec![],
1159 };
1160
1161 let result = encode_edit_with_options(&edit, EncodeOptions::new());
1163 assert!(result.is_ok());
1164
1165 let result = encode_edit_with_options(&edit, EncodeOptions::canonical());
1167 assert!(matches!(result, Err(EncodeError::DuplicateAuthor { .. })));
1168 }
1169
1170 #[test]
1171 fn test_canonical_rejects_duplicate_values() {
1172 let prop = [10u8; 16];
1173
1174 let edit: Edit<'static> = Edit {
1175 id: [0u8; 16],
1176 name: Cow::Owned("Test".to_string()),
1177 authors: vec![],
1178 created_at: 0,
1179 ops: vec![
1180 Op::CreateEntity(CreateEntity {
1181 id: [1u8; 16],
1182 values: vec![
1183 PropertyValue {
1184 property: prop,
1185 value: Value::Text {
1186 value: Cow::Owned("First".to_string()),
1187 language: None,
1188 },
1189 },
1190 PropertyValue {
1191 property: prop,
1192 value: Value::Text {
1193 value: Cow::Owned("Second".to_string()),
1194 language: None,
1195 },
1196 },
1197 ],
1198 context: None,
1199 }),
1200 ],
1201 };
1202
1203 let result = encode_edit_with_options(&edit, EncodeOptions::canonical());
1205 assert!(matches!(result, Err(EncodeError::DuplicateValue { .. })));
1206 }
1207
1208 #[test]
1209 fn test_canonical_allows_different_languages() {
1210 let prop = [10u8; 16];
1211 let lang_en = [20u8; 16];
1212 let lang_es = [21u8; 16];
1213
1214 let edit: Edit<'static> = Edit {
1215 id: [0u8; 16],
1216 name: Cow::Owned("Test".to_string()),
1217 authors: vec![],
1218 created_at: 0,
1219 ops: vec![
1220 Op::CreateEntity(CreateEntity {
1221 id: [1u8; 16],
1222 values: vec![
1223 PropertyValue {
1224 property: prop,
1225 value: Value::Text {
1226 value: Cow::Owned("Hello".to_string()),
1227 language: Some(lang_en),
1228 },
1229 },
1230 PropertyValue {
1231 property: prop,
1232 value: Value::Text {
1233 value: Cow::Owned("Hola".to_string()),
1234 language: Some(lang_es),
1235 },
1236 },
1237 ],
1238 context: None,
1239 }),
1240 ],
1241 };
1242
1243 let result = encode_edit_with_options(&edit, EncodeOptions::canonical());
1245 assert!(result.is_ok());
1246 }
1247
1248 #[test]
1249 fn test_canonical_sorts_values_deterministically() {
1250 let prop_a = [0x0A; 16];
1251 let prop_b = [0x0B; 16];
1252
1253 let edit: Edit<'static> = Edit {
1255 id: [1u8; 16],
1256 name: Cow::Owned("Test".to_string()),
1257 authors: vec![],
1258 created_at: 0,
1259 ops: vec![
1260 Op::CreateEntity(CreateEntity {
1261 id: [3u8; 16],
1262 values: vec![
1263 PropertyValue {
1264 property: prop_b, value: Value::Int64 { value: 42, unit: None },
1266 },
1267 PropertyValue {
1268 property: prop_a, value: Value::Text {
1270 value: Cow::Owned("Hello".to_string()),
1271 language: None,
1272 },
1273 },
1274 ],
1275 context: None,
1276 }),
1277 ],
1278 };
1279
1280 let encoded1 = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap();
1282 let encoded2 = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap();
1283 assert_eq!(encoded1, encoded2, "Canonical encoding should be deterministic");
1284
1285 let decoded = decode_edit(&encoded1).unwrap();
1287 assert_eq!(decoded.ops.len(), 1);
1288 }
1289}