1use std::borrow::Cow;
6use std::io::Read;
7
8use rustc_hash::{FxHashMap, FxHashSet};
9
10use crate::codec::op::{decode_op, encode_op};
11use crate::codec::primitives::{Reader, Writer};
12use crate::error::{DecodeError, EncodeError};
13use crate::limits::{
14 FORMAT_VERSION, MAGIC_COMPRESSED, MAGIC_UNCOMPRESSED, MAX_AUTHORS, MAX_DICT_SIZE,
15 MAX_EDIT_SIZE, MAX_OPS_PER_EDIT, MAX_STRING_LEN, MIN_FORMAT_VERSION,
16};
17use crate::model::{
18 Context, ContextEdge, DataType, DictionaryBuilder, Edit, Id, Op, UnsetLanguage,
19 UnsetRelationField, WireDictionaries,
20};
21
22pub fn decompress(input: &[u8]) -> Result<Vec<u8>, DecodeError> {
36 if input.len() < 5 {
37 return Err(DecodeError::UnexpectedEof { context: "magic" });
38 }
39 if &input[0..5] != MAGIC_COMPRESSED {
40 let mut found = [0u8; 4];
41 found.copy_from_slice(&input[0..4]);
42 return Err(DecodeError::InvalidMagic { found });
43 }
44 decompress_zstd(&input[5..])
45}
46
47pub fn decode_edit(input: &[u8]) -> Result<Edit<'_>, DecodeError> {
64 if input.len() < 4 {
65 return Err(DecodeError::UnexpectedEof { context: "magic" });
66 }
67
68 if input.len() >= 5 && &input[0..5] == MAGIC_COMPRESSED {
70 let decompressed = decompress_zstd(&input[5..])?;
73 if decompressed.len() > MAX_EDIT_SIZE {
74 return Err(DecodeError::LengthExceedsLimit {
75 field: "edit",
76 len: decompressed.len(),
77 max: MAX_EDIT_SIZE,
78 });
79 }
80 decode_edit_owned(&decompressed)
81 } else if &input[0..4] == MAGIC_UNCOMPRESSED {
82 if input.len() > MAX_EDIT_SIZE {
84 return Err(DecodeError::LengthExceedsLimit {
85 field: "edit",
86 len: input.len(),
87 max: MAX_EDIT_SIZE,
88 });
89 }
90 decode_edit_borrowed(input)
91 } else {
92 let mut found = [0u8; 4];
93 found.copy_from_slice(&input[0..4]);
94 Err(DecodeError::InvalidMagic { found })
95 }
96}
97
98fn decode_edit_borrowed(input: &[u8]) -> Result<Edit<'_>, DecodeError> {
100 let mut reader = Reader::new(input);
101
102 reader.read_bytes(4, "magic")?;
104
105 let version = reader.read_byte("version")?;
107 if version < MIN_FORMAT_VERSION || version > FORMAT_VERSION {
108 return Err(DecodeError::UnsupportedVersion { version });
109 }
110
111 let edit_id = reader.read_id("edit_id")?;
113 let name = Cow::Borrowed(reader.read_str(MAX_STRING_LEN, "name")?);
114 let authors = reader.read_id_vec(MAX_AUTHORS, "authors")?;
115 let created_at = reader.read_signed_varint("created_at")?;
116
117 let property_count = reader.read_varint("property_count")? as usize;
119 if property_count > MAX_DICT_SIZE {
120 return Err(DecodeError::LengthExceedsLimit {
121 field: "properties",
122 len: property_count,
123 max: MAX_DICT_SIZE,
124 });
125 }
126 let mut properties = Vec::with_capacity(property_count);
127 let mut seen_props = FxHashSet::with_capacity_and_hasher(property_count, Default::default());
128 for _ in 0..property_count {
129 let id = reader.read_id("property_id")?;
130 if !seen_props.insert(id) {
131 return Err(DecodeError::DuplicateDictionaryEntry { dict: "properties", id });
132 }
133 let dt_byte = reader.read_byte("data_type")?;
134 let data_type = DataType::from_u8(dt_byte)
135 .ok_or(DecodeError::InvalidDataType { data_type: dt_byte })?;
136 properties.push((id, data_type));
137 }
138
139 let relation_types = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "relation_types")?;
140 let languages = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "languages")?;
141 let units = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "units")?;
142 let objects = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "objects")?;
143 let context_ids = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "context_ids")?;
144
145 let mut dicts = WireDictionaries {
146 properties,
147 relation_types,
148 languages,
149 units,
150 objects,
151 context_ids,
152 contexts: Vec::new(),
153 };
154
155 let context_count = reader.read_varint("context_count")? as usize;
157 if context_count > MAX_DICT_SIZE {
158 return Err(DecodeError::LengthExceedsLimit {
159 field: "contexts",
160 len: context_count,
161 max: MAX_DICT_SIZE,
162 });
163 }
164 for _ in 0..context_count {
165 dicts.contexts.push(decode_context(&mut reader, &dicts)?);
166 }
167
168 let op_count = reader.read_varint("op_count")? as usize;
170 if op_count > MAX_OPS_PER_EDIT {
171 return Err(DecodeError::LengthExceedsLimit {
172 field: "ops",
173 len: op_count,
174 max: MAX_OPS_PER_EDIT,
175 });
176 }
177
178 let mut ops = Vec::with_capacity(op_count);
179 for _ in 0..op_count {
180 ops.push(decode_op(&mut reader, &dicts)?);
181 }
182
183 Ok(Edit {
184 id: edit_id,
185 name,
186 authors,
187 created_at,
188 ops,
189 })
190}
191
192fn decode_edit_owned(data: &[u8]) -> Result<Edit<'static>, DecodeError> {
194 let mut reader = Reader::new(data);
195
196 reader.read_bytes(4, "magic")?;
198
199 let version = reader.read_byte("version")?;
201 if version < MIN_FORMAT_VERSION || version > FORMAT_VERSION {
202 return Err(DecodeError::UnsupportedVersion { version });
203 }
204
205 let edit_id = reader.read_id("edit_id")?;
207 let name = Cow::Owned(reader.read_string(MAX_STRING_LEN, "name")?);
208 let authors = reader.read_id_vec(MAX_AUTHORS, "authors")?;
209 let created_at = reader.read_signed_varint("created_at")?;
210
211 let property_count = reader.read_varint("property_count")? as usize;
213 if property_count > MAX_DICT_SIZE {
214 return Err(DecodeError::LengthExceedsLimit {
215 field: "properties",
216 len: property_count,
217 max: MAX_DICT_SIZE,
218 });
219 }
220 let mut properties = Vec::with_capacity(property_count);
221 let mut seen_props = FxHashSet::with_capacity_and_hasher(property_count, Default::default());
222 for _ in 0..property_count {
223 let id = reader.read_id("property_id")?;
224 if !seen_props.insert(id) {
225 return Err(DecodeError::DuplicateDictionaryEntry { dict: "properties", id });
226 }
227 let dt_byte = reader.read_byte("data_type")?;
228 let data_type = DataType::from_u8(dt_byte)
229 .ok_or(DecodeError::InvalidDataType { data_type: dt_byte })?;
230 properties.push((id, data_type));
231 }
232
233 let relation_types = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "relation_types")?;
234 let languages = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "languages")?;
235 let units = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "units")?;
236 let objects = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "objects")?;
237 let context_ids = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "context_ids")?;
238
239 let mut dicts = WireDictionaries {
240 properties,
241 relation_types,
242 languages,
243 units,
244 objects,
245 context_ids,
246 contexts: Vec::new(),
247 };
248
249 let context_count = reader.read_varint("context_count")? as usize;
251 if context_count > MAX_DICT_SIZE {
252 return Err(DecodeError::LengthExceedsLimit {
253 field: "contexts",
254 len: context_count,
255 max: MAX_DICT_SIZE,
256 });
257 }
258 for _ in 0..context_count {
259 dicts.contexts.push(decode_context(&mut reader, &dicts)?);
260 }
261
262 let op_count = reader.read_varint("op_count")? as usize;
264 if op_count > MAX_OPS_PER_EDIT {
265 return Err(DecodeError::LengthExceedsLimit {
266 field: "ops",
267 len: op_count,
268 max: MAX_OPS_PER_EDIT,
269 });
270 }
271
272 let mut ops = Vec::with_capacity(op_count);
273 for _ in 0..op_count {
274 ops.push(decode_op_owned(&mut reader, &dicts)?);
275 }
276
277 Ok(Edit {
278 id: edit_id,
279 name,
280 authors,
281 created_at,
282 ops,
283 })
284}
285
286fn decode_op_owned(reader: &mut Reader<'_>, dicts: &WireDictionaries) -> Result<Op<'static>, DecodeError> {
288 let op = decode_op(reader, dicts)?;
290 Ok(op_to_owned(op))
291}
292
293fn decode_context(reader: &mut Reader<'_>, dicts: &WireDictionaries) -> Result<Context, DecodeError> {
295 let root_id_index = reader.read_varint("root_id")? as usize;
296 if root_id_index >= dicts.context_ids.len() {
297 return Err(DecodeError::IndexOutOfBounds {
298 dict: "context_ids",
299 index: root_id_index,
300 size: dicts.context_ids.len(),
301 });
302 }
303 let root_id = dicts.context_ids[root_id_index];
304
305 let edge_count = reader.read_varint("edge_count")? as usize;
306 if edge_count > MAX_DICT_SIZE {
307 return Err(DecodeError::LengthExceedsLimit {
308 field: "context_edges",
309 len: edge_count,
310 max: MAX_DICT_SIZE,
311 });
312 }
313
314 let mut edges = Vec::with_capacity(edge_count);
315 for _ in 0..edge_count {
316 let type_id_index = reader.read_varint("edge_type_id")? as usize;
317 if type_id_index >= dicts.relation_types.len() {
318 return Err(DecodeError::IndexOutOfBounds {
319 dict: "relation_types",
320 index: type_id_index,
321 size: dicts.relation_types.len(),
322 });
323 }
324 let type_id = dicts.relation_types[type_id_index];
325
326 let to_entity_id_index = reader.read_varint("edge_to_entity_id")? as usize;
327 if to_entity_id_index >= dicts.context_ids.len() {
328 return Err(DecodeError::IndexOutOfBounds {
329 dict: "context_ids",
330 index: to_entity_id_index,
331 size: dicts.context_ids.len(),
332 });
333 }
334 let to_entity_id = dicts.context_ids[to_entity_id_index];
335
336 edges.push(ContextEdge { type_id, to_entity_id });
337 }
338
339 Ok(Context { root_id, edges })
340}
341
342fn op_to_owned(op: Op<'_>) -> Op<'static> {
344 match op {
345 Op::CreateEntity(ce) => Op::CreateEntity(crate::model::CreateEntity {
346 id: ce.id,
347 values: ce.values.into_iter().map(pv_to_owned).collect(),
348 context: ce.context,
349 }),
350 Op::UpdateEntity(ue) => Op::UpdateEntity(crate::model::UpdateEntity {
351 id: ue.id,
352 set_properties: ue.set_properties.into_iter().map(pv_to_owned).collect(),
353 unset_values: ue.unset_values,
354 context: ue.context,
355 }),
356 Op::DeleteEntity(de) => Op::DeleteEntity(de),
357 Op::RestoreEntity(re) => Op::RestoreEntity(re),
358 Op::CreateRelation(cr) => Op::CreateRelation(crate::model::CreateRelation {
359 id: cr.id,
360 relation_type: cr.relation_type,
361 from: cr.from,
362 from_is_value_ref: cr.from_is_value_ref,
363 to: cr.to,
364 to_is_value_ref: cr.to_is_value_ref,
365 entity: cr.entity,
366 position: cr.position.map(|p| Cow::Owned(p.into_owned())),
367 from_space: cr.from_space,
368 from_version: cr.from_version,
369 to_space: cr.to_space,
370 to_version: cr.to_version,
371 context: cr.context,
372 }),
373 Op::UpdateRelation(ur) => Op::UpdateRelation(crate::model::UpdateRelation {
374 id: ur.id,
375 from_space: ur.from_space,
376 from_version: ur.from_version,
377 to_space: ur.to_space,
378 to_version: ur.to_version,
379 position: ur.position.map(|p| Cow::Owned(p.into_owned())),
380 unset: ur.unset,
381 context: ur.context,
382 }),
383 Op::DeleteRelation(dr) => Op::DeleteRelation(dr),
384 Op::RestoreRelation(rr) => Op::RestoreRelation(rr),
385 Op::CreateValueRef(cvr) => Op::CreateValueRef(cvr),
386 }
387}
388
389fn pv_to_owned(pv: crate::model::PropertyValue<'_>) -> crate::model::PropertyValue<'static> {
391 crate::model::PropertyValue {
392 property: pv.property,
393 value: value_to_owned(pv.value),
394 }
395}
396
397fn value_to_owned(v: crate::model::Value<'_>) -> crate::model::Value<'static> {
399 use crate::model::{DecimalMantissa, Value};
400 match v {
401 Value::Bool(b) => Value::Bool(b),
402 Value::Int64 { value, unit } => Value::Int64 { value, unit },
403 Value::Float64 { value, unit } => Value::Float64 { value, unit },
404 Value::Decimal { exponent, mantissa, unit } => Value::Decimal {
405 exponent,
406 mantissa: match mantissa {
407 DecimalMantissa::I64(i) => DecimalMantissa::I64(i),
408 DecimalMantissa::Big(b) => DecimalMantissa::Big(Cow::Owned(b.into_owned())),
409 },
410 unit,
411 },
412 Value::Text { value, language } => Value::Text {
413 value: Cow::Owned(value.into_owned()),
414 language,
415 },
416 Value::Bytes(b) => Value::Bytes(Cow::Owned(b.into_owned())),
417 Value::Date(s) => Value::Date(Cow::Owned(s.into_owned())),
418 Value::Time(s) => Value::Time(Cow::Owned(s.into_owned())),
419 Value::Datetime(s) => Value::Datetime(Cow::Owned(s.into_owned())),
420 Value::Schedule(s) => Value::Schedule(Cow::Owned(s.into_owned())),
421 Value::Point { lat, lon, alt } => Value::Point { lat, lon, alt },
422 Value::Rect { min_lat, min_lon, max_lat, max_lon } => Value::Rect { min_lat, min_lon, max_lat, max_lon },
423 Value::Embedding { sub_type, dims, data } => Value::Embedding {
424 sub_type,
425 dims,
426 data: Cow::Owned(data.into_owned()),
427 },
428 }
429}
430
431fn read_id_vec_no_duplicates(
433 reader: &mut Reader<'_>,
434 max_len: usize,
435 field: &'static str,
436) -> Result<Vec<Id>, DecodeError> {
437 let count = reader.read_varint(field)? as usize;
438 if count > max_len {
439 return Err(DecodeError::LengthExceedsLimit {
440 field,
441 len: count,
442 max: max_len,
443 });
444 }
445
446 let mut ids = Vec::with_capacity(count);
447 let mut seen = FxHashSet::with_capacity_and_hasher(count, Default::default());
448
449 for _ in 0..count {
450 let id = reader.read_id(field)?;
451 if !seen.insert(id) {
452 return Err(DecodeError::DuplicateDictionaryEntry { dict: field, id });
453 }
454 ids.push(id);
455 }
456
457 Ok(ids)
458}
459
460fn decompress_zstd(compressed: &[u8]) -> Result<Vec<u8>, DecodeError> {
461 let mut reader = Reader::new(compressed);
463 let declared_size = reader.read_varint("uncompressed_size")? as usize;
464
465 if declared_size > MAX_EDIT_SIZE {
466 return Err(DecodeError::LengthExceedsLimit {
467 field: "uncompressed_size",
468 len: declared_size,
469 max: MAX_EDIT_SIZE,
470 });
471 }
472
473 let compressed_data = reader.remaining();
474
475 let mut decoder = zstd::Decoder::new(compressed_data)
476 .map_err(|e| DecodeError::DecompressionFailed(e.to_string()))?;
477
478 let mut decompressed = Vec::with_capacity(declared_size);
479 decoder
480 .read_to_end(&mut decompressed)
481 .map_err(|e| DecodeError::DecompressionFailed(e.to_string()))?;
482
483 if decompressed.len() != declared_size {
484 return Err(DecodeError::UncompressedSizeMismatch {
485 declared: declared_size,
486 actual: decompressed.len(),
487 });
488 }
489
490 Ok(decompressed)
491}
492
493#[derive(Debug, Clone, Copy, Default)]
499pub struct EncodeOptions {
500 pub canonical: bool,
514}
515
516impl EncodeOptions {
517 pub fn new() -> Self {
519 Self::default()
520 }
521
522 pub fn canonical() -> Self {
524 Self { canonical: true }
525 }
526}
527
528fn validate_context_limits(context: &Context) -> Result<(), EncodeError> {
529 if context.edges.len() > MAX_DICT_SIZE {
530 return Err(EncodeError::LengthExceedsLimit {
531 field: "context_edges",
532 len: context.edges.len(),
533 max: MAX_DICT_SIZE,
534 });
535 }
536 Ok(())
537}
538
539fn validate_edit_inputs(edit: &Edit) -> Result<(), EncodeError> {
546 let name_len = edit.name.as_bytes().len();
547 if name_len > MAX_STRING_LEN {
548 return Err(EncodeError::LengthExceedsLimit {
549 field: "name",
550 len: name_len,
551 max: MAX_STRING_LEN,
552 });
553 }
554 if edit.authors.len() > MAX_AUTHORS {
555 return Err(EncodeError::LengthExceedsLimit {
556 field: "authors",
557 len: edit.authors.len(),
558 max: MAX_AUTHORS,
559 });
560 }
561 if edit.ops.len() > MAX_OPS_PER_EDIT {
562 return Err(EncodeError::LengthExceedsLimit {
563 field: "ops",
564 len: edit.ops.len(),
565 max: MAX_OPS_PER_EDIT,
566 });
567 }
568
569 let mut property_types: FxHashMap<Id, DataType> = FxHashMap::default();
570 let mut deleted_entities: FxHashSet<Id> = FxHashSet::default();
571 let mut deleted_relations: FxHashSet<Id> = FxHashSet::default();
572
573 for op in &edit.ops {
574 match op {
575 Op::CreateEntity(ce) => {
576 if deleted_entities.contains(&ce.id) {
577 return Err(EncodeError::InvalidInput { context: "delete-then-create entity in same edit" });
578 }
579 if ce.values.len() > crate::limits::MAX_VALUES_PER_ENTITY {
580 return Err(EncodeError::LengthExceedsLimit {
581 field: "values",
582 len: ce.values.len(),
583 max: crate::limits::MAX_VALUES_PER_ENTITY,
584 });
585 }
586 for pv in &ce.values {
587 let dt = pv.value.data_type();
588 if let Some(existing) = property_types.get(&pv.property) {
589 if *existing != dt {
590 return Err(EncodeError::InvalidInput { context: "property type mismatch" });
591 }
592 } else {
593 property_types.insert(pv.property, dt);
594 }
595 }
596 if let Some(ctx) = &ce.context {
597 validate_context_limits(ctx)?;
598 }
599 }
600 Op::UpdateEntity(ue) => {
601 if ue.set_properties.len() > crate::limits::MAX_VALUES_PER_ENTITY {
602 return Err(EncodeError::LengthExceedsLimit {
603 field: "set_properties",
604 len: ue.set_properties.len(),
605 max: crate::limits::MAX_VALUES_PER_ENTITY,
606 });
607 }
608 if ue.unset_values.len() > crate::limits::MAX_VALUES_PER_ENTITY {
609 return Err(EncodeError::LengthExceedsLimit {
610 field: "unset_values",
611 len: ue.unset_values.len(),
612 max: crate::limits::MAX_VALUES_PER_ENTITY,
613 });
614 }
615 let mut set_langs: FxHashMap<Id, FxHashSet<Option<Id>>> = FxHashMap::default();
616 for pv in &ue.set_properties {
617 let dt = pv.value.data_type();
618 if let Some(existing) = property_types.get(&pv.property) {
619 if *existing != dt {
620 return Err(EncodeError::InvalidInput { context: "property type mismatch" });
621 }
622 } else {
623 property_types.insert(pv.property, dt);
624 }
625 let lang_key = match &pv.value {
626 crate::model::Value::Text { language, .. } => *language,
627 _ => None,
628 };
629 set_langs.entry(pv.property).or_default().insert(lang_key);
630 }
631
632 for unset in &ue.unset_values {
633 match &unset.language {
634 UnsetLanguage::All => {
635 if let Some(existing) = set_langs.get(&unset.property) {
636 if !existing.is_empty() {
637 return Err(EncodeError::InvalidInput { context: "update_entity set/unset overlap" });
638 }
639 }
640 }
641 UnsetLanguage::English => {
642 if let Some(existing) = set_langs.get(&unset.property) {
643 if existing.contains(&None) {
644 return Err(EncodeError::InvalidInput { context: "update_entity set/unset overlap" });
645 }
646 }
647 if let Some(existing) = property_types.get(&unset.property) {
648 if *existing != DataType::Text {
649 return Err(EncodeError::InvalidInput { context: "unset language requires TEXT" });
650 }
651 } else {
652 property_types.insert(unset.property, DataType::Text);
653 }
654 }
655 UnsetLanguage::Specific(lang_id) => {
656 if let Some(existing) = set_langs.get(&unset.property) {
657 if existing.contains(&Some(*lang_id)) {
658 return Err(EncodeError::InvalidInput { context: "update_entity set/unset overlap" });
659 }
660 }
661 if let Some(existing) = property_types.get(&unset.property) {
662 if *existing != DataType::Text {
663 return Err(EncodeError::InvalidInput { context: "unset language requires TEXT" });
664 }
665 } else {
666 property_types.insert(unset.property, DataType::Text);
667 }
668 }
669 }
670 }
671 if let Some(ctx) = &ue.context {
672 validate_context_limits(ctx)?;
673 }
674 }
675 Op::DeleteEntity(de) => {
676 deleted_entities.insert(de.id);
677 if let Some(ctx) = &de.context {
678 validate_context_limits(ctx)?;
679 }
680 }
681 Op::RestoreEntity(re) => {
682 if let Some(ctx) = &re.context {
683 validate_context_limits(ctx)?;
684 }
685 }
686 Op::CreateRelation(cr) => {
687 if deleted_relations.contains(&cr.id) {
688 return Err(EncodeError::InvalidInput { context: "delete-then-create relation in same edit" });
689 }
690 if let Some(entity) = cr.entity {
691 if entity == cr.id {
692 return Err(EncodeError::InvalidInput { context: "relation entity must differ from id" });
693 }
694 }
695 if let Some(ctx) = &cr.context {
696 validate_context_limits(ctx)?;
697 }
698 }
699 Op::UpdateRelation(ur) => {
700 let mut seen_unset: FxHashSet<UnsetRelationField> = FxHashSet::default();
701 for field in &ur.unset {
702 if !seen_unset.insert(*field) {
703 return Err(EncodeError::InvalidInput { context: "update_relation duplicate unset field" });
704 }
705 }
706 if ur.unset.contains(&UnsetRelationField::FromSpace) && ur.from_space.is_some() {
707 return Err(EncodeError::InvalidInput { context: "update_relation set/unset overlap" });
708 }
709 if ur.unset.contains(&UnsetRelationField::FromVersion) && ur.from_version.is_some() {
710 return Err(EncodeError::InvalidInput { context: "update_relation set/unset overlap" });
711 }
712 if ur.unset.contains(&UnsetRelationField::ToSpace) && ur.to_space.is_some() {
713 return Err(EncodeError::InvalidInput { context: "update_relation set/unset overlap" });
714 }
715 if ur.unset.contains(&UnsetRelationField::ToVersion) && ur.to_version.is_some() {
716 return Err(EncodeError::InvalidInput { context: "update_relation set/unset overlap" });
717 }
718 if ur.unset.contains(&UnsetRelationField::Position) && ur.position.is_some() {
719 return Err(EncodeError::InvalidInput { context: "update_relation set/unset overlap" });
720 }
721 if let Some(ctx) = &ur.context {
722 validate_context_limits(ctx)?;
723 }
724 }
725 Op::DeleteRelation(dr) => {
726 deleted_relations.insert(dr.id);
727 if let Some(ctx) = &dr.context {
728 validate_context_limits(ctx)?;
729 }
730 }
731 Op::RestoreRelation(rr) => {
732 if let Some(ctx) = &rr.context {
733 validate_context_limits(ctx)?;
734 }
735 }
736 Op::CreateValueRef(cvr) => {
737 if cvr.language.is_some() {
738 if let Some(existing) = property_types.get(&cvr.property) {
739 if *existing != DataType::Text {
740 return Err(EncodeError::InvalidInput { context: "create_value_ref language requires TEXT" });
741 }
742 } else {
743 property_types.insert(cvr.property, DataType::Text);
744 }
745 }
746 }
747 }
748 }
749
750 Ok(())
751}
752
753pub fn encode_edit(edit: &Edit) -> Result<Vec<u8>, EncodeError> {
758 encode_edit_with_options(edit, EncodeOptions::default())
759}
760
761pub fn encode_edit_with_options(edit: &Edit, options: EncodeOptions) -> Result<Vec<u8>, EncodeError> {
763 validate_edit_inputs(edit)?;
764 if options.canonical {
765 encode_edit_canonical(edit)
766 } else {
767 encode_edit_fast(edit)
768 }
769}
770
771fn encode_edit_fast(edit: &Edit) -> Result<Vec<u8>, EncodeError> {
773 let property_types = rustc_hash::FxHashMap::default();
775
776 let mut dict_builder = DictionaryBuilder::with_capacity(edit.ops.len());
778
779 let mut ops_writer = Writer::with_capacity(edit.ops.len() * 50);
781
782 for op in &edit.ops {
783 encode_op(&mut ops_writer, op, &mut dict_builder, &property_types)?;
784 }
785 dict_builder.validate_limits()?;
786
787 let ops_bytes = ops_writer.into_bytes();
789 let mut writer = Writer::with_capacity(256 + ops_bytes.len());
790
791 writer.write_bytes(MAGIC_UNCOMPRESSED);
793 writer.write_byte(FORMAT_VERSION);
794
795 writer.write_id(&edit.id);
797 writer.write_string(&edit.name);
798 writer.write_id_vec(&edit.authors);
799 writer.write_signed_varint(edit.created_at);
800
801 dict_builder.write_dictionaries(&mut writer);
803
804 dict_builder.write_contexts(&mut writer);
806
807 writer.write_varint(edit.ops.len() as u64);
809 writer.write_bytes(&ops_bytes);
810
811 Ok(writer.into_bytes())
812}
813
814fn encode_edit_canonical(edit: &Edit) -> Result<Vec<u8>, EncodeError> {
825 let property_types = rustc_hash::FxHashMap::default();
827
828 let mut dict_builder = DictionaryBuilder::with_capacity(edit.ops.len());
830
831 let mut temp_writer = Writer::with_capacity(edit.ops.len() * 50);
833 for op in &edit.ops {
834 encode_op(&mut temp_writer, op, &mut dict_builder, &property_types)?;
835 }
836 dict_builder.validate_limits()?;
837
838 let sorted_builder = dict_builder.into_sorted();
840
841 let mut sorted_authors = edit.authors.clone();
843 sorted_authors.sort();
844 for i in 1..sorted_authors.len() {
846 if sorted_authors[i] == sorted_authors[i - 1] {
847 return Err(EncodeError::DuplicateAuthor { id: sorted_authors[i] });
848 }
849 }
850
851 let mut ops_writer = Writer::with_capacity(edit.ops.len() * 50);
853 let mut canonical_builder = sorted_builder.clone();
854 for op in &edit.ops {
855 encode_op_canonical(&mut ops_writer, op, &mut canonical_builder, &property_types)?;
856 }
857
858 let ops_bytes = ops_writer.into_bytes();
860 let mut writer = Writer::with_capacity(256 + ops_bytes.len());
861
862 writer.write_bytes(MAGIC_UNCOMPRESSED);
864 writer.write_byte(FORMAT_VERSION);
865
866 writer.write_id(&edit.id);
868 writer.write_string(&edit.name);
869 writer.write_id_vec(&sorted_authors);
870 writer.write_signed_varint(edit.created_at);
871
872 sorted_builder.write_dictionaries(&mut writer);
874
875 sorted_builder.write_contexts(&mut writer);
877
878 writer.write_varint(edit.ops.len() as u64);
880 writer.write_bytes(&ops_bytes);
881
882 Ok(writer.into_bytes())
883}
884
885fn encode_op_canonical(
887 writer: &mut Writer,
888 op: &Op<'_>,
889 dict_builder: &mut DictionaryBuilder,
890 property_types: &FxHashMap<Id, DataType>,
891) -> Result<(), EncodeError> {
892 match op {
893 Op::CreateEntity(ce) => {
894 let sorted_values = sort_and_check_values(&ce.values, dict_builder)?;
896
897 writer.write_byte(1); writer.write_id(&ce.id);
899 writer.write_varint(sorted_values.len() as u64);
900
901 for pv in &sorted_values {
902 let data_type = property_types.get(&pv.property)
903 .copied()
904 .unwrap_or_else(|| pv.value.data_type());
905 encode_property_value_canonical(writer, pv, dict_builder, data_type)?;
906 }
907 let context_ref = match &ce.context {
909 Some(ctx) => dict_builder.add_context(ctx) as u32,
910 None => 0xFFFFFFFF,
911 };
912 writer.write_varint(context_ref as u64);
913 Ok(())
914 }
915 Op::UpdateEntity(ue) => {
916 let sorted_set = sort_and_check_values(&ue.set_properties, dict_builder)?;
918 let sorted_unset = sort_and_check_unsets(&ue.unset_values, dict_builder)?;
919
920 writer.write_byte(2); let id_index = dict_builder.add_object(ue.id);
922 writer.write_varint(id_index as u64);
923
924 let mut flags = 0u8;
925 if !sorted_set.is_empty() {
926 flags |= 0x01; }
928 if !sorted_unset.is_empty() {
929 flags |= 0x02; }
931 writer.write_byte(flags);
932
933 if !sorted_set.is_empty() {
934 writer.write_varint(sorted_set.len() as u64);
935 for pv in &sorted_set {
936 let data_type = property_types.get(&pv.property)
937 .copied()
938 .unwrap_or_else(|| pv.value.data_type());
939 encode_property_value_canonical(writer, pv, dict_builder, data_type)?;
940 }
941 }
942
943 if !sorted_unset.is_empty() {
944 use crate::model::UnsetLanguage;
945 writer.write_varint(sorted_unset.len() as u64);
946 for unset in &sorted_unset {
947 let prop_idx = dict_builder.add_property(unset.property, DataType::Bool);
948 writer.write_varint(prop_idx as u64);
949 let lang_value: u32 = match &unset.language {
950 UnsetLanguage::All => 0xFFFFFFFF,
951 UnsetLanguage::English => 0,
952 UnsetLanguage::Specific(lang_id) => {
953 dict_builder.add_language(Some(*lang_id)) as u32
954 }
955 };
956 writer.write_varint(lang_value as u64);
957 }
958 }
959 let context_ref = match &ue.context {
961 Some(ctx) => dict_builder.add_context(ctx) as u32,
962 None => 0xFFFFFFFF,
963 };
964 writer.write_varint(context_ref as u64);
965 Ok(())
966 }
967 _ => encode_op(writer, op, dict_builder, property_types),
969 }
970}
971
972fn sort_and_check_values<'a>(
974 values: &[crate::model::PropertyValue<'a>],
975 dict_builder: &DictionaryBuilder,
976) -> Result<Vec<crate::model::PropertyValue<'a>>, EncodeError> {
977 use crate::model::{PropertyValue, Value};
978
979 if values.is_empty() {
980 return Ok(Vec::new());
981 }
982
983 let mut indexed: Vec<(usize, usize, usize, &PropertyValue<'a>)> = values
985 .iter()
986 .enumerate()
987 .map(|(i, pv)| {
988 let prop_idx = dict_builder.get_property_index(&pv.property).unwrap_or(0);
989 let lang_idx = match &pv.value {
990 Value::Text { language, .. } => dict_builder.get_language_index(language.as_ref()).unwrap_or(0),
991 _ => 0,
992 };
993 (prop_idx, lang_idx, i, pv)
994 })
995 .collect();
996
997 indexed.sort_by(|a, b| (a.0, a.1).cmp(&(b.0, b.1)));
999
1000 for i in 1..indexed.len() {
1002 if indexed[i].0 == indexed[i - 1].0 && indexed[i].1 == indexed[i - 1].1 {
1003 let pv = indexed[i].3;
1004 let language = match &pv.value {
1005 Value::Text { language, .. } => *language,
1006 _ => None,
1007 };
1008 return Err(EncodeError::DuplicateValue {
1009 property: pv.property,
1010 language,
1011 });
1012 }
1013 }
1014
1015 Ok(indexed.into_iter().map(|(_, _, _, pv)| pv.clone()).collect())
1017}
1018
1019fn sort_and_check_unsets(
1021 unsets: &[crate::model::UnsetValue],
1022 dict_builder: &DictionaryBuilder,
1023) -> Result<Vec<crate::model::UnsetValue>, EncodeError> {
1024 use crate::model::UnsetLanguage;
1025
1026 if unsets.is_empty() {
1027 return Ok(Vec::new());
1028 }
1029
1030 let mut indexed: Vec<(usize, u32, usize, &crate::model::UnsetValue)> = unsets
1032 .iter()
1033 .enumerate()
1034 .map(|(i, up)| {
1035 let prop_idx = dict_builder.get_property_index(&up.property).unwrap_or(0);
1036 let lang_key: u32 = match &up.language {
1037 UnsetLanguage::All => 0xFFFFFFFF,
1038 UnsetLanguage::English => 0,
1039 UnsetLanguage::Specific(lang_id) => {
1040 dict_builder.get_language_index(Some(lang_id)).unwrap_or(0) as u32
1041 }
1042 };
1043 (prop_idx, lang_key, i, up)
1044 })
1045 .collect();
1046
1047 indexed.sort_by(|a, b| (a.0, a.1).cmp(&(b.0, b.1)));
1049
1050 for i in 1..indexed.len() {
1052 if indexed[i].0 == indexed[i - 1].0 && indexed[i].1 == indexed[i - 1].1 {
1053 let up = indexed[i].3;
1054 let language = match &up.language {
1055 UnsetLanguage::All => None,
1056 UnsetLanguage::English => None,
1057 UnsetLanguage::Specific(id) => Some(*id),
1058 };
1059 return Err(EncodeError::DuplicateUnset {
1060 property: up.property,
1061 language,
1062 });
1063 }
1064 }
1065
1066 Ok(indexed.into_iter().map(|(_, _, _, up)| up.clone()).collect())
1067}
1068
1069fn encode_property_value_canonical(
1071 writer: &mut Writer,
1072 pv: &crate::model::PropertyValue<'_>,
1073 dict_builder: &mut DictionaryBuilder,
1074 data_type: DataType,
1075) -> Result<(), EncodeError> {
1076 let prop_index = dict_builder.add_property(pv.property, data_type);
1077 writer.write_varint(prop_index as u64);
1078 crate::codec::value::encode_value(writer, &pv.value, dict_builder)?;
1079 Ok(())
1080}
1081
1082pub fn encode_edit_profiled(edit: &Edit, profile: bool) -> Result<Vec<u8>, EncodeError> {
1084 if !profile {
1085 return encode_edit(edit);
1086 }
1087
1088 use std::time::Instant;
1089
1090 let t0 = Instant::now();
1091
1092 let property_types = rustc_hash::FxHashMap::default();
1094 let t1 = Instant::now();
1095
1096 let mut dict_builder = DictionaryBuilder::with_capacity(edit.ops.len());
1098
1099 let mut ops_writer = Writer::with_capacity(edit.ops.len() * 50);
1101
1102 for op in &edit.ops {
1103 encode_op(&mut ops_writer, op, &mut dict_builder, &property_types)?;
1104 }
1105 let t2 = Instant::now();
1106
1107 let ops_bytes = ops_writer.into_bytes();
1109 let mut writer = Writer::with_capacity(256 + ops_bytes.len());
1110
1111 writer.write_bytes(MAGIC_UNCOMPRESSED);
1112 writer.write_byte(FORMAT_VERSION);
1113 writer.write_id(&edit.id);
1114 writer.write_string(&edit.name);
1115 writer.write_id_vec(&edit.authors);
1116 writer.write_signed_varint(edit.created_at);
1117 dict_builder.write_dictionaries(&mut writer);
1118 dict_builder.write_contexts(&mut writer);
1119 writer.write_varint(edit.ops.len() as u64);
1120 writer.write_bytes(&ops_bytes);
1121 let t3 = Instant::now();
1122
1123 let result = writer.into_bytes();
1124
1125 let total = t3.duration_since(t0);
1126 eprintln!("=== Encode Profile (single-pass) ===");
1127 eprintln!(" setup: {:?} ({:.1}%)", t1.duration_since(t0), 100.0 * t1.duration_since(t0).as_secs_f64() / total.as_secs_f64());
1128 eprintln!(" encode_ops + build_dicts: {:?} ({:.1}%)", t2.duration_since(t1), 100.0 * t2.duration_since(t1).as_secs_f64() / total.as_secs_f64());
1129 eprintln!(" assemble output: {:?} ({:.1}%)", t3.duration_since(t2), 100.0 * t3.duration_since(t2).as_secs_f64() / total.as_secs_f64());
1130 eprintln!(" TOTAL: {:?}", total);
1131
1132 Ok(result)
1133}
1134
1135pub fn encode_edit_compressed(edit: &Edit, level: i32) -> Result<Vec<u8>, EncodeError> {
1137 encode_edit_compressed_with_options(edit, level, EncodeOptions::default())
1138}
1139
1140pub fn encode_edit_compressed_with_options(
1142 edit: &Edit,
1143 level: i32,
1144 options: EncodeOptions,
1145) -> Result<Vec<u8>, EncodeError> {
1146 let uncompressed = encode_edit_with_options(edit, options)?;
1147
1148 let compressed = zstd::encode_all(uncompressed.as_slice(), level)
1149 .map_err(|e| EncodeError::CompressionFailed(e.to_string()))?;
1150
1151 let mut writer = Writer::with_capacity(5 + 10 + compressed.len());
1152 writer.write_bytes(MAGIC_COMPRESSED);
1153 writer.write_varint(uncompressed.len() as u64);
1154 writer.write_bytes(&compressed);
1155
1156 Ok(writer.into_bytes())
1157}
1158
1159#[cfg(test)]
1160mod tests {
1161 use super::*;
1162 use crate::model::{
1163 CreateEntity, CreateRelation, CreateValueRef, DeleteEntity, DeleteRelation, PropertyValue,
1164 UpdateEntity, UpdateRelation, UnsetLanguage, UnsetRelationField, UnsetValue, Value,
1165 };
1166
1167 fn make_test_edit() -> Edit<'static> {
1168 Edit {
1169 id: [1u8; 16],
1170 name: Cow::Owned("Test Edit".to_string()),
1171 authors: vec![[2u8; 16]],
1172 created_at: 1234567890,
1173 ops: vec![
1174 Op::CreateEntity(CreateEntity {
1175 id: [3u8; 16],
1176 values: vec![PropertyValue {
1177 property: [10u8; 16],
1178 value: Value::Text {
1179 value: Cow::Owned("Hello".to_string()),
1180 language: None,
1181 },
1182 }],
1183 context: None,
1184 }),
1185 ],
1186 }
1187 }
1188
1189 #[test]
1190 fn test_edit_roundtrip() {
1191 let edit = make_test_edit();
1192
1193 let encoded = encode_edit(&edit).unwrap();
1194 let decoded = decode_edit(&encoded).unwrap();
1195
1196 assert_eq!(edit.id, decoded.id);
1197 assert_eq!(edit.name, decoded.name);
1198 assert_eq!(edit.authors, decoded.authors);
1199 assert_eq!(edit.created_at, decoded.created_at);
1200 assert_eq!(edit.ops.len(), decoded.ops.len());
1201 }
1202
1203 #[test]
1204 fn test_edit_compressed_roundtrip() {
1205 let edit = make_test_edit();
1206
1207 let encoded = encode_edit_compressed(&edit, 3).unwrap();
1208 let decoded = decode_edit(&encoded).unwrap();
1209
1210 assert_eq!(edit.id, decoded.id);
1211 assert_eq!(edit.name, decoded.name);
1212 assert_eq!(edit.authors, decoded.authors);
1213 assert_eq!(edit.created_at, decoded.created_at);
1214 assert_eq!(edit.ops.len(), decoded.ops.len());
1215 }
1216
1217 #[test]
1218 fn test_update_entity_set_unset_overlap_rejected() {
1219 let edit = Edit {
1220 id: [1u8; 16],
1221 name: Cow::Borrowed(""),
1222 authors: vec![],
1223 created_at: 0,
1224 ops: vec![Op::UpdateEntity(UpdateEntity {
1225 id: [2u8; 16],
1226 set_properties: vec![PropertyValue {
1227 property: [3u8; 16],
1228 value: Value::Text {
1229 value: Cow::Owned("x".to_string()),
1230 language: None,
1231 },
1232 }],
1233 unset_values: vec![UnsetValue {
1234 property: [3u8; 16],
1235 language: UnsetLanguage::English,
1236 }],
1237 context: None,
1238 })],
1239 };
1240
1241 let err = encode_edit(&edit).unwrap_err();
1242 assert!(matches!(err, EncodeError::InvalidInput { .. }));
1243 }
1244
1245 #[test]
1246 fn test_unset_language_requires_text() {
1247 let edit = Edit {
1248 id: [1u8; 16],
1249 name: Cow::Borrowed(""),
1250 authors: vec![],
1251 created_at: 0,
1252 ops: vec![Op::UpdateEntity(UpdateEntity {
1253 id: [2u8; 16],
1254 set_properties: vec![PropertyValue {
1255 property: [3u8; 16],
1256 value: Value::Int64 { value: 1, unit: None },
1257 }],
1258 unset_values: vec![UnsetValue {
1259 property: [3u8; 16],
1260 language: UnsetLanguage::English,
1261 }],
1262 context: None,
1263 })],
1264 };
1265
1266 let err = encode_edit(&edit).unwrap_err();
1267 assert!(matches!(err, EncodeError::InvalidInput { .. }));
1268 }
1269
1270 #[test]
1271 fn test_update_relation_set_unset_overlap_rejected() {
1272 let edit = Edit {
1273 id: [1u8; 16],
1274 name: Cow::Borrowed(""),
1275 authors: vec![],
1276 created_at: 0,
1277 ops: vec![Op::UpdateRelation(UpdateRelation {
1278 id: [4u8; 16],
1279 from_space: Some([5u8; 16]),
1280 from_version: None,
1281 to_space: None,
1282 to_version: None,
1283 position: None,
1284 unset: vec![UnsetRelationField::FromSpace],
1285 context: None,
1286 })],
1287 };
1288
1289 let err = encode_edit(&edit).unwrap_err();
1290 assert!(matches!(err, EncodeError::InvalidInput { .. }));
1291 }
1292
1293 #[test]
1294 fn test_property_type_mismatch_rejected() {
1295 let edit = Edit {
1296 id: [1u8; 16],
1297 name: Cow::Borrowed(""),
1298 authors: vec![],
1299 created_at: 0,
1300 ops: vec![
1301 Op::CreateEntity(CreateEntity {
1302 id: [2u8; 16],
1303 values: vec![PropertyValue {
1304 property: [3u8; 16],
1305 value: Value::Text {
1306 value: Cow::Owned("x".to_string()),
1307 language: None,
1308 },
1309 }],
1310 context: None,
1311 }),
1312 Op::UpdateEntity(UpdateEntity {
1313 id: [2u8; 16],
1314 set_properties: vec![PropertyValue {
1315 property: [3u8; 16],
1316 value: Value::Int64 { value: 1, unit: None },
1317 }],
1318 unset_values: vec![],
1319 context: None,
1320 }),
1321 ],
1322 };
1323
1324 let err = encode_edit(&edit).unwrap_err();
1325 assert!(matches!(err, EncodeError::InvalidInput { .. }));
1326 }
1327
1328 #[test]
1329 fn test_delete_then_create_entity_rejected() {
1330 let edit = Edit {
1331 id: [1u8; 16],
1332 name: Cow::Borrowed(""),
1333 authors: vec![],
1334 created_at: 0,
1335 ops: vec![
1336 Op::DeleteEntity(DeleteEntity {
1337 id: [2u8; 16],
1338 context: None,
1339 }),
1340 Op::CreateEntity(CreateEntity {
1341 id: [2u8; 16],
1342 values: vec![],
1343 context: None,
1344 }),
1345 ],
1346 };
1347
1348 let err = encode_edit(&edit).unwrap_err();
1349 assert!(matches!(err, EncodeError::InvalidInput { .. }));
1350 }
1351
1352 #[test]
1353 fn test_delete_then_create_relation_rejected() {
1354 let edit = Edit {
1355 id: [1u8; 16],
1356 name: Cow::Borrowed(""),
1357 authors: vec![],
1358 created_at: 0,
1359 ops: vec![
1360 Op::DeleteRelation(DeleteRelation {
1361 id: [4u8; 16],
1362 context: None,
1363 }),
1364 Op::CreateRelation(CreateRelation {
1365 id: [4u8; 16],
1366 relation_type: [5u8; 16],
1367 from: [6u8; 16],
1368 from_is_value_ref: false,
1369 from_space: None,
1370 from_version: None,
1371 to: [7u8; 16],
1372 to_is_value_ref: false,
1373 to_space: None,
1374 to_version: None,
1375 entity: None,
1376 position: None,
1377 context: None,
1378 }),
1379 ],
1380 };
1381
1382 let err = encode_edit(&edit).unwrap_err();
1383 assert!(matches!(err, EncodeError::InvalidInput { .. }));
1384 }
1385
1386 #[test]
1387 fn test_create_relation_entity_equals_id_rejected() {
1388 let edit = Edit {
1389 id: [1u8; 16],
1390 name: Cow::Borrowed(""),
1391 authors: vec![],
1392 created_at: 0,
1393 ops: vec![Op::CreateRelation(CreateRelation {
1394 id: [4u8; 16],
1395 relation_type: [5u8; 16],
1396 from: [6u8; 16],
1397 from_is_value_ref: false,
1398 from_space: None,
1399 from_version: None,
1400 to: [7u8; 16],
1401 to_is_value_ref: false,
1402 to_space: None,
1403 to_version: None,
1404 entity: Some([4u8; 16]),
1405 position: None,
1406 context: None,
1407 })],
1408 };
1409
1410 let err = encode_edit(&edit).unwrap_err();
1411 assert!(matches!(err, EncodeError::InvalidInput { .. }));
1412 }
1413
1414 #[test]
1415 fn test_create_value_ref_language_requires_text() {
1416 let edit = Edit {
1417 id: [1u8; 16],
1418 name: Cow::Borrowed(""),
1419 authors: vec![],
1420 created_at: 0,
1421 ops: vec![
1422 Op::CreateEntity(CreateEntity {
1423 id: [2u8; 16],
1424 values: vec![PropertyValue {
1425 property: [3u8; 16],
1426 value: Value::Int64 { value: 1, unit: None },
1427 }],
1428 context: None,
1429 }),
1430 Op::CreateValueRef(CreateValueRef {
1431 id: [8u8; 16],
1432 entity: [2u8; 16],
1433 property: [3u8; 16],
1434 language: Some([9u8; 16]),
1435 space: None,
1436 }),
1437 ],
1438 };
1439
1440 let err = encode_edit(&edit).unwrap_err();
1441 assert!(matches!(err, EncodeError::InvalidInput { .. }));
1442 }
1443
1444 #[test]
1445 fn test_value_ref_endpoints_not_in_object_ids() {
1446 let edit = Edit {
1447 id: [1u8; 16],
1448 name: Cow::Borrowed(""),
1449 authors: vec![],
1450 created_at: 0,
1451 ops: vec![
1452 Op::CreateValueRef(CreateValueRef {
1453 id: [10u8; 16],
1454 entity: [2u8; 16],
1455 property: [3u8; 16],
1456 language: None,
1457 space: None,
1458 }),
1459 Op::CreateRelation(CreateRelation {
1460 id: [4u8; 16],
1461 relation_type: [5u8; 16],
1462 from: [10u8; 16],
1463 from_is_value_ref: true,
1464 from_space: None,
1465 from_version: None,
1466 to: [2u8; 16],
1467 to_is_value_ref: false,
1468 to_space: None,
1469 to_version: None,
1470 entity: None,
1471 position: None,
1472 context: None,
1473 }),
1474 ],
1475 };
1476
1477 let encoded = encode_edit(&edit).unwrap();
1478 let mut reader = Reader::new(&encoded);
1479 reader.read_bytes(4, "magic").unwrap();
1480 reader.read_byte("version").unwrap();
1481 reader.read_id("edit_id").unwrap();
1482 reader.read_string(MAX_STRING_LEN, "name").unwrap();
1483 reader.read_id_vec(MAX_AUTHORS, "authors").unwrap();
1484 reader.read_signed_varint("created_at").unwrap();
1485 let property_count = reader.read_varint("property_count").unwrap() as usize;
1486 for _ in 0..property_count {
1487 reader.read_id("property_id").unwrap();
1488 reader.read_byte("data_type").unwrap();
1489 }
1490 let _relation_types = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "relation_types").unwrap();
1491 let _languages = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "languages").unwrap();
1492 let _units = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "units").unwrap();
1493 let objects = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "objects").unwrap();
1494 let _context_ids = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "context_ids").unwrap();
1495
1496 assert!(!objects.contains(&[10u8; 16]));
1497 assert!(objects.contains(&[2u8; 16]));
1498 }
1499
1500 #[test]
1501 fn test_canonical_rejects_duplicate_unset() {
1502 let edit = Edit {
1503 id: [1u8; 16],
1504 name: Cow::Borrowed(""),
1505 authors: vec![],
1506 created_at: 0,
1507 ops: vec![Op::UpdateEntity(UpdateEntity {
1508 id: [2u8; 16],
1509 set_properties: vec![],
1510 unset_values: vec![
1511 UnsetValue {
1512 property: [3u8; 16],
1513 language: UnsetLanguage::English,
1514 },
1515 UnsetValue {
1516 property: [3u8; 16],
1517 language: UnsetLanguage::English,
1518 },
1519 ],
1520 context: None,
1521 })],
1522 };
1523
1524 let err = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap_err();
1525 assert!(matches!(err, EncodeError::DuplicateUnset { .. }));
1526 }
1527
1528 #[test]
1529 fn test_canonical_rejects_duplicate_update_relation_unset_fields() {
1530 let edit = Edit {
1531 id: [1u8; 16],
1532 name: Cow::Borrowed(""),
1533 authors: vec![],
1534 created_at: 0,
1535 ops: vec![Op::UpdateRelation(UpdateRelation {
1536 id: [4u8; 16],
1537 from_space: None,
1538 from_version: None,
1539 to_space: None,
1540 to_version: None,
1541 position: None,
1542 unset: vec![UnsetRelationField::FromSpace, UnsetRelationField::FromSpace],
1543 context: None,
1544 })],
1545 };
1546
1547 let err = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap_err();
1548 assert!(matches!(err, EncodeError::InvalidInput { .. }));
1549 }
1550
1551 #[test]
1552 fn test_compression_magic() {
1553 let edit = make_test_edit();
1554
1555 let uncompressed = encode_edit(&edit).unwrap();
1556 let compressed = encode_edit_compressed(&edit, 3).unwrap();
1557
1558 assert_eq!(&uncompressed[0..4], b"GRC2");
1559 assert_eq!(&compressed[0..5], b"GRC2Z");
1560 }
1561
1562 #[test]
1563 fn test_invalid_magic() {
1564 let data = b"XXXX";
1565 let result = decode_edit(data);
1566 assert!(matches!(result, Err(DecodeError::InvalidMagic { .. })));
1567 }
1568
1569 #[test]
1570 fn test_unsupported_version() {
1571 let mut data = Vec::new();
1572 data.extend_from_slice(MAGIC_UNCOMPRESSED);
1573 data.push(99); data.extend_from_slice(&[0u8; 100]);
1576
1577 let result = decode_edit(&data);
1578 assert!(matches!(result, Err(DecodeError::UnsupportedVersion { version: 99 })));
1579 }
1580
1581 #[test]
1582 fn test_empty_edit() {
1583 let edit: Edit<'static> = Edit {
1584 id: [0u8; 16],
1585 name: Cow::Borrowed(""),
1586 authors: vec![],
1587 created_at: 0,
1588 ops: vec![],
1589 };
1590
1591 let encoded = encode_edit(&edit).unwrap();
1592 let decoded = decode_edit(&encoded).unwrap();
1593
1594 assert_eq!(edit.id, decoded.id);
1595 assert!(decoded.name.is_empty());
1596 assert!(decoded.authors.is_empty());
1597 assert!(decoded.ops.is_empty());
1598 }
1599
1600 #[test]
1601 fn test_canonical_encoding_deterministic() {
1602 let prop_a = [0x0A; 16]; let prop_b = [0x0B; 16]; let edit1: Edit<'static> = Edit {
1610 id: [1u8; 16],
1611 name: Cow::Owned("Test".to_string()),
1612 authors: vec![],
1613 created_at: 0,
1614 ops: vec![
1615 Op::CreateEntity(CreateEntity {
1616 id: [3u8; 16],
1617 values: vec![
1618 PropertyValue {
1619 property: prop_a,
1620 value: Value::Text {
1621 value: Cow::Owned("Hello".to_string()),
1622 language: None,
1623 },
1624 },
1625 PropertyValue {
1626 property: prop_b,
1627 value: Value::Int64 { value: 42, unit: None },
1628 },
1629 ],
1630 context: None,
1631 }),
1632 ],
1633 };
1634
1635 let edit2: Edit<'static> = Edit {
1637 id: [1u8; 16],
1638 name: Cow::Owned("Test".to_string()),
1639 authors: vec![],
1640 created_at: 0,
1641 ops: vec![
1642 Op::CreateEntity(CreateEntity {
1643 id: [3u8; 16],
1644 values: vec![
1645 PropertyValue {
1647 property: prop_b,
1648 value: Value::Int64 { value: 42, unit: None },
1649 },
1650 PropertyValue {
1651 property: prop_a,
1652 value: Value::Text {
1653 value: Cow::Owned("Hello".to_string()),
1654 language: None,
1655 },
1656 },
1657 ],
1658 context: None,
1659 }),
1660 ],
1661 };
1662
1663 let fast1 = encode_edit_with_options(&edit1, EncodeOptions::new()).unwrap();
1665 let fast2 = encode_edit_with_options(&edit2, EncodeOptions::new()).unwrap();
1666 let canonical1 = encode_edit_with_options(&edit1, EncodeOptions::canonical()).unwrap();
1671 let canonical2 = encode_edit_with_options(&edit2, EncodeOptions::canonical()).unwrap();
1672
1673 let decoded1 = decode_edit(&canonical1).unwrap();
1675 let decoded2 = decode_edit(&canonical2).unwrap();
1676 assert_eq!(decoded1.id, edit1.id);
1677 assert_eq!(decoded2.id, edit2.id);
1678
1679 assert_eq!(
1683 &canonical1[..50], &canonical2[..50],
1685 "Canonical encoding should produce identical dictionary bytes"
1686 );
1687
1688 let _ = fast1;
1690 let _ = fast2;
1691 }
1692
1693 #[test]
1694 fn test_canonical_encoding_roundtrip() {
1695 let edit = make_test_edit();
1696
1697 let encoded = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap();
1698 let decoded = decode_edit(&encoded).unwrap();
1699
1700 assert_eq!(edit.id, decoded.id);
1701 assert_eq!(edit.name, decoded.name);
1702 assert_eq!(edit.authors, decoded.authors);
1703 assert_eq!(edit.created_at, decoded.created_at);
1704 assert_eq!(edit.ops.len(), decoded.ops.len());
1705 }
1706
1707 #[test]
1708 fn test_canonical_encoding_compressed() {
1709 let edit = make_test_edit();
1710
1711 let encoded = encode_edit_compressed_with_options(&edit, 3, EncodeOptions::canonical()).unwrap();
1712 let decoded = decode_edit(&encoded).unwrap();
1713
1714 assert_eq!(edit.id, decoded.id);
1715 assert_eq!(edit.name, decoded.name);
1716 }
1717
1718 #[test]
1719 fn test_canonical_rejects_duplicate_authors() {
1720 let author1 = [1u8; 16];
1721
1722 let edit: Edit<'static> = Edit {
1723 id: [0u8; 16],
1724 name: Cow::Owned("Test".to_string()),
1725 authors: vec![author1, author1], created_at: 0,
1727 ops: vec![],
1728 };
1729
1730 let result = encode_edit_with_options(&edit, EncodeOptions::new());
1732 assert!(result.is_ok());
1733
1734 let result = encode_edit_with_options(&edit, EncodeOptions::canonical());
1736 assert!(matches!(result, Err(EncodeError::DuplicateAuthor { .. })));
1737 }
1738
1739 #[test]
1740 fn test_canonical_rejects_duplicate_values() {
1741 let prop = [10u8; 16];
1742
1743 let edit: Edit<'static> = Edit {
1744 id: [0u8; 16],
1745 name: Cow::Owned("Test".to_string()),
1746 authors: vec![],
1747 created_at: 0,
1748 ops: vec![
1749 Op::CreateEntity(CreateEntity {
1750 id: [1u8; 16],
1751 values: vec![
1752 PropertyValue {
1753 property: prop,
1754 value: Value::Text {
1755 value: Cow::Owned("First".to_string()),
1756 language: None,
1757 },
1758 },
1759 PropertyValue {
1760 property: prop,
1761 value: Value::Text {
1762 value: Cow::Owned("Second".to_string()),
1763 language: None,
1764 },
1765 },
1766 ],
1767 context: None,
1768 }),
1769 ],
1770 };
1771
1772 let result = encode_edit_with_options(&edit, EncodeOptions::canonical());
1774 assert!(matches!(result, Err(EncodeError::DuplicateValue { .. })));
1775 }
1776
1777 #[test]
1778 fn test_canonical_allows_different_languages() {
1779 let prop = [10u8; 16];
1780 let lang_en = [20u8; 16];
1781 let lang_es = [21u8; 16];
1782
1783 let edit: Edit<'static> = Edit {
1784 id: [0u8; 16],
1785 name: Cow::Owned("Test".to_string()),
1786 authors: vec![],
1787 created_at: 0,
1788 ops: vec![
1789 Op::CreateEntity(CreateEntity {
1790 id: [1u8; 16],
1791 values: vec![
1792 PropertyValue {
1793 property: prop,
1794 value: Value::Text {
1795 value: Cow::Owned("Hello".to_string()),
1796 language: Some(lang_en),
1797 },
1798 },
1799 PropertyValue {
1800 property: prop,
1801 value: Value::Text {
1802 value: Cow::Owned("Hola".to_string()),
1803 language: Some(lang_es),
1804 },
1805 },
1806 ],
1807 context: None,
1808 }),
1809 ],
1810 };
1811
1812 let result = encode_edit_with_options(&edit, EncodeOptions::canonical());
1814 assert!(result.is_ok());
1815 }
1816
1817 #[test]
1818 fn test_canonical_sorts_values_deterministically() {
1819 let prop_a = [0x0A; 16];
1820 let prop_b = [0x0B; 16];
1821
1822 let edit: Edit<'static> = Edit {
1824 id: [1u8; 16],
1825 name: Cow::Owned("Test".to_string()),
1826 authors: vec![],
1827 created_at: 0,
1828 ops: vec![
1829 Op::CreateEntity(CreateEntity {
1830 id: [3u8; 16],
1831 values: vec![
1832 PropertyValue {
1833 property: prop_b, value: Value::Int64 { value: 42, unit: None },
1835 },
1836 PropertyValue {
1837 property: prop_a, value: Value::Text {
1839 value: Cow::Owned("Hello".to_string()),
1840 language: None,
1841 },
1842 },
1843 ],
1844 context: None,
1845 }),
1846 ],
1847 };
1848
1849 let encoded1 = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap();
1851 let encoded2 = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap();
1852 assert_eq!(encoded1, encoded2, "Canonical encoding should be deterministic");
1853
1854 let decoded = decode_edit(&encoded1).unwrap();
1856 assert_eq!(decoded.ops.len(), 1);
1857 }
1858}