Skip to main content

grc_20/codec/
edit.rs

1//! Edit encoding/decoding for GRC-20 binary format.
2//!
3//! Implements the wire format for edits (spec Section 6.3).
4
5use std::borrow::Cow;
6use std::io::Read;
7
8use rustc_hash::{FxHashMap, FxHashSet};
9
10use crate::codec::op::{decode_op, encode_op};
11use crate::codec::primitives::{Reader, Writer};
12use crate::error::{DecodeError, EncodeError};
13use crate::limits::{
14    FORMAT_VERSION, MAGIC_COMPRESSED, MAGIC_UNCOMPRESSED, MAX_AUTHORS, MAX_DICT_SIZE,
15    MAX_EDIT_SIZE, MAX_OPS_PER_EDIT, MAX_STRING_LEN, MIN_FORMAT_VERSION,
16};
17use crate::model::{
18    Context, ContextEdge, DataType, DictionaryBuilder, Edit, Id, Op, UnsetLanguage,
19    UnsetRelationField, WireDictionaries,
20};
21
22// =============================================================================
23// DECODING
24// =============================================================================
25
26/// Decompresses a GRC2Z compressed edit, returning the uncompressed bytes.
27///
28/// Use this with [`decode_edit`] for zero-copy decoding of compressed data:
29///
30/// ```ignore
31/// let uncompressed = decompress(&compressed_bytes)?;
32/// let edit = decode_edit(&uncompressed)?;  // zero-copy, borrows from uncompressed
33/// // edit is valid while uncompressed is alive
34/// ```
35pub fn decompress(input: &[u8]) -> Result<Vec<u8>, DecodeError> {
36    if input.len() < 5 {
37        return Err(DecodeError::UnexpectedEof { context: "magic" });
38    }
39    if &input[0..5] != MAGIC_COMPRESSED {
40        let mut found = [0u8; 4];
41        found.copy_from_slice(&input[0..4]);
42        return Err(DecodeError::InvalidMagic { found });
43    }
44    decompress_zstd(&input[5..])
45}
46
47/// Decodes an Edit from binary data with zero-copy borrowing.
48///
49/// Handles both compressed (GRC2Z) and uncompressed (GRC2) formats.
50/// For true zero-copy with compressed data, use [`decompress`] first:
51///
52/// ```ignore
53/// // Zero-copy for compressed data:
54/// let uncompressed = decompress(&compressed)?;
55/// let edit = decode_edit(&uncompressed)?;
56///
57/// // Zero-copy for uncompressed data:
58/// let edit = decode_edit(&uncompressed_bytes)?;
59/// ```
60///
61/// If you pass compressed data directly, it will decompress internally
62/// and allocate owned strings (no zero-copy benefit).
63pub fn decode_edit(input: &[u8]) -> Result<Edit<'_>, DecodeError> {
64    if input.len() < 4 {
65        return Err(DecodeError::UnexpectedEof { context: "magic" });
66    }
67
68    // Detect compression
69    if input.len() >= 5 && &input[0..5] == MAGIC_COMPRESSED {
70        // Compressed: decompress and decode with allocations
71        // (for zero-copy, caller should use decompress() first)
72        let decompressed = decompress_zstd(&input[5..])?;
73        if decompressed.len() > MAX_EDIT_SIZE {
74            return Err(DecodeError::LengthExceedsLimit {
75                field: "edit",
76                len: decompressed.len(),
77                max: MAX_EDIT_SIZE,
78            });
79        }
80        decode_edit_owned(&decompressed)
81    } else if &input[0..4] == MAGIC_UNCOMPRESSED {
82        // Uncompressed: decode with zero-copy borrowing
83        if input.len() > MAX_EDIT_SIZE {
84            return Err(DecodeError::LengthExceedsLimit {
85                field: "edit",
86                len: input.len(),
87                max: MAX_EDIT_SIZE,
88            });
89        }
90        decode_edit_borrowed(input)
91    } else {
92        let mut found = [0u8; 4];
93        found.copy_from_slice(&input[0..4]);
94        Err(DecodeError::InvalidMagic { found })
95    }
96}
97
98/// Decodes an Edit with zero-copy borrowing from the input.
99fn decode_edit_borrowed(input: &[u8]) -> Result<Edit<'_>, DecodeError> {
100    let mut reader = Reader::new(input);
101
102    // Skip magic (already validated)
103    reader.read_bytes(4, "magic")?;
104
105    // Version
106    let version = reader.read_byte("version")?;
107    if version < MIN_FORMAT_VERSION || version > FORMAT_VERSION {
108        return Err(DecodeError::UnsupportedVersion { version });
109    }
110
111    // Header
112    let edit_id = reader.read_id("edit_id")?;
113    let name = Cow::Borrowed(reader.read_str(MAX_STRING_LEN, "name")?);
114    let authors = reader.read_id_vec(MAX_AUTHORS, "authors")?;
115    let created_at = reader.read_signed_varint("created_at")?;
116
117    // Schema dictionaries (with duplicate detection)
118    let property_count = reader.read_varint("property_count")? as usize;
119    if property_count > MAX_DICT_SIZE {
120        return Err(DecodeError::LengthExceedsLimit {
121            field: "properties",
122            len: property_count,
123            max: MAX_DICT_SIZE,
124        });
125    }
126    let mut properties = Vec::with_capacity(property_count);
127    let mut seen_props = FxHashSet::with_capacity_and_hasher(property_count, Default::default());
128    for _ in 0..property_count {
129        let id = reader.read_id("property_id")?;
130        if !seen_props.insert(id) {
131            return Err(DecodeError::DuplicateDictionaryEntry { dict: "properties", id });
132        }
133        let dt_byte = reader.read_byte("data_type")?;
134        let data_type = DataType::from_u8(dt_byte)
135            .ok_or(DecodeError::InvalidDataType { data_type: dt_byte })?;
136        properties.push((id, data_type));
137    }
138
139    let relation_types = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "relation_types")?;
140    let languages = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "languages")?;
141    let units = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "units")?;
142    let objects = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "objects")?;
143    let context_ids = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "context_ids")?;
144
145    let mut dicts = WireDictionaries {
146        properties,
147        relation_types,
148        languages,
149        units,
150        objects,
151        context_ids,
152        contexts: Vec::new(),
153    };
154
155    // Contexts - decode and store in dicts for op decoding to resolve
156    let context_count = reader.read_varint("context_count")? as usize;
157    if context_count > MAX_DICT_SIZE {
158        return Err(DecodeError::LengthExceedsLimit {
159            field: "contexts",
160            len: context_count,
161            max: MAX_DICT_SIZE,
162        });
163    }
164    for _ in 0..context_count {
165        dicts.contexts.push(decode_context(&mut reader, &dicts)?);
166    }
167
168    // Operations
169    let op_count = reader.read_varint("op_count")? as usize;
170    if op_count > MAX_OPS_PER_EDIT {
171        return Err(DecodeError::LengthExceedsLimit {
172            field: "ops",
173            len: op_count,
174            max: MAX_OPS_PER_EDIT,
175        });
176    }
177
178    let mut ops = Vec::with_capacity(op_count);
179    for _ in 0..op_count {
180        ops.push(decode_op(&mut reader, &dicts)?);
181    }
182
183    Ok(Edit {
184        id: edit_id,
185        name,
186        authors,
187        created_at,
188        ops,
189    })
190}
191
192/// Decodes an Edit with allocations (for decompressed data).
193fn decode_edit_owned(data: &[u8]) -> Result<Edit<'static>, DecodeError> {
194    let mut reader = Reader::new(data);
195
196    // Skip magic (already validated in decompress)
197    reader.read_bytes(4, "magic")?;
198
199    // Version
200    let version = reader.read_byte("version")?;
201    if version < MIN_FORMAT_VERSION || version > FORMAT_VERSION {
202        return Err(DecodeError::UnsupportedVersion { version });
203    }
204
205    // Header - use allocating reads
206    let edit_id = reader.read_id("edit_id")?;
207    let name = Cow::Owned(reader.read_string(MAX_STRING_LEN, "name")?);
208    let authors = reader.read_id_vec(MAX_AUTHORS, "authors")?;
209    let created_at = reader.read_signed_varint("created_at")?;
210
211    // Schema dictionaries (with duplicate detection)
212    let property_count = reader.read_varint("property_count")? as usize;
213    if property_count > MAX_DICT_SIZE {
214        return Err(DecodeError::LengthExceedsLimit {
215            field: "properties",
216            len: property_count,
217            max: MAX_DICT_SIZE,
218        });
219    }
220    let mut properties = Vec::with_capacity(property_count);
221    let mut seen_props = FxHashSet::with_capacity_and_hasher(property_count, Default::default());
222    for _ in 0..property_count {
223        let id = reader.read_id("property_id")?;
224        if !seen_props.insert(id) {
225            return Err(DecodeError::DuplicateDictionaryEntry { dict: "properties", id });
226        }
227        let dt_byte = reader.read_byte("data_type")?;
228        let data_type = DataType::from_u8(dt_byte)
229            .ok_or(DecodeError::InvalidDataType { data_type: dt_byte })?;
230        properties.push((id, data_type));
231    }
232
233    let relation_types = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "relation_types")?;
234    let languages = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "languages")?;
235    let units = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "units")?;
236    let objects = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "objects")?;
237    let context_ids = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "context_ids")?;
238
239    let mut dicts = WireDictionaries {
240        properties,
241        relation_types,
242        languages,
243        units,
244        objects,
245        context_ids,
246        contexts: Vec::new(),
247    };
248
249    // Contexts - decode and store in dicts for op decoding to resolve
250    let context_count = reader.read_varint("context_count")? as usize;
251    if context_count > MAX_DICT_SIZE {
252        return Err(DecodeError::LengthExceedsLimit {
253            field: "contexts",
254            len: context_count,
255            max: MAX_DICT_SIZE,
256        });
257    }
258    for _ in 0..context_count {
259        dicts.contexts.push(decode_context(&mut reader, &dicts)?);
260    }
261
262    // Operations - use allocating decode
263    let op_count = reader.read_varint("op_count")? as usize;
264    if op_count > MAX_OPS_PER_EDIT {
265        return Err(DecodeError::LengthExceedsLimit {
266            field: "ops",
267            len: op_count,
268            max: MAX_OPS_PER_EDIT,
269        });
270    }
271
272    let mut ops = Vec::with_capacity(op_count);
273    for _ in 0..op_count {
274        ops.push(decode_op_owned(&mut reader, &dicts)?);
275    }
276
277    Ok(Edit {
278        id: edit_id,
279        name,
280        authors,
281        created_at,
282        ops,
283    })
284}
285
286/// Decodes an Op with allocations (for decompressed data).
287fn decode_op_owned(reader: &mut Reader<'_>, dicts: &WireDictionaries) -> Result<Op<'static>, DecodeError> {
288    // Decode normally, then convert to owned
289    let op = decode_op(reader, dicts)?;
290    Ok(op_to_owned(op))
291}
292
293/// Decodes a Context from the reader.
294fn decode_context(reader: &mut Reader<'_>, dicts: &WireDictionaries) -> Result<Context, DecodeError> {
295    let root_id_index = reader.read_varint("root_id")? as usize;
296    if root_id_index >= dicts.context_ids.len() {
297        return Err(DecodeError::IndexOutOfBounds {
298            dict: "context_ids",
299            index: root_id_index,
300            size: dicts.context_ids.len(),
301        });
302    }
303    let root_id = dicts.context_ids[root_id_index];
304
305    let edge_count = reader.read_varint("edge_count")? as usize;
306    if edge_count > MAX_DICT_SIZE {
307        return Err(DecodeError::LengthExceedsLimit {
308            field: "context_edges",
309            len: edge_count,
310            max: MAX_DICT_SIZE,
311        });
312    }
313
314    let mut edges = Vec::with_capacity(edge_count);
315    for _ in 0..edge_count {
316        let type_id_index = reader.read_varint("edge_type_id")? as usize;
317        if type_id_index >= dicts.relation_types.len() {
318            return Err(DecodeError::IndexOutOfBounds {
319                dict: "relation_types",
320                index: type_id_index,
321                size: dicts.relation_types.len(),
322            });
323        }
324        let type_id = dicts.relation_types[type_id_index];
325
326        let to_entity_id_index = reader.read_varint("edge_to_entity_id")? as usize;
327        if to_entity_id_index >= dicts.context_ids.len() {
328            return Err(DecodeError::IndexOutOfBounds {
329                dict: "context_ids",
330                index: to_entity_id_index,
331                size: dicts.context_ids.len(),
332            });
333        }
334        let to_entity_id = dicts.context_ids[to_entity_id_index];
335
336        edges.push(ContextEdge { type_id, to_entity_id });
337    }
338
339    Ok(Context { root_id, edges })
340}
341
342/// Converts an Op with borrowed data to owned data.
343fn op_to_owned(op: Op<'_>) -> Op<'static> {
344    match op {
345        Op::CreateEntity(ce) => Op::CreateEntity(crate::model::CreateEntity {
346            id: ce.id,
347            values: ce.values.into_iter().map(pv_to_owned).collect(),
348            context: ce.context,
349        }),
350        Op::UpdateEntity(ue) => Op::UpdateEntity(crate::model::UpdateEntity {
351            id: ue.id,
352            set_properties: ue.set_properties.into_iter().map(pv_to_owned).collect(),
353            unset_values: ue.unset_values,
354            context: ue.context,
355        }),
356        Op::DeleteEntity(de) => Op::DeleteEntity(de),
357        Op::RestoreEntity(re) => Op::RestoreEntity(re),
358        Op::CreateRelation(cr) => Op::CreateRelation(crate::model::CreateRelation {
359            id: cr.id,
360            relation_type: cr.relation_type,
361            from: cr.from,
362            from_is_value_ref: cr.from_is_value_ref,
363            to: cr.to,
364            to_is_value_ref: cr.to_is_value_ref,
365            entity: cr.entity,
366            position: cr.position.map(|p| Cow::Owned(p.into_owned())),
367            from_space: cr.from_space,
368            from_version: cr.from_version,
369            to_space: cr.to_space,
370            to_version: cr.to_version,
371            context: cr.context,
372        }),
373        Op::UpdateRelation(ur) => Op::UpdateRelation(crate::model::UpdateRelation {
374            id: ur.id,
375            from_space: ur.from_space,
376            from_version: ur.from_version,
377            to_space: ur.to_space,
378            to_version: ur.to_version,
379            position: ur.position.map(|p| Cow::Owned(p.into_owned())),
380            unset: ur.unset,
381            context: ur.context,
382        }),
383        Op::DeleteRelation(dr) => Op::DeleteRelation(dr),
384        Op::RestoreRelation(rr) => Op::RestoreRelation(rr),
385        Op::CreateValueRef(cvr) => Op::CreateValueRef(cvr),
386    }
387}
388
389/// Converts a PropertyValue with borrowed data to owned data.
390fn pv_to_owned(pv: crate::model::PropertyValue<'_>) -> crate::model::PropertyValue<'static> {
391    crate::model::PropertyValue {
392        property: pv.property,
393        value: value_to_owned(pv.value),
394    }
395}
396
397/// Converts a Value with borrowed data to owned data.
398fn value_to_owned(v: crate::model::Value<'_>) -> crate::model::Value<'static> {
399    use crate::model::{DecimalMantissa, Value};
400    match v {
401        Value::Bool(b) => Value::Bool(b),
402        Value::Int64 { value, unit } => Value::Int64 { value, unit },
403        Value::Float64 { value, unit } => Value::Float64 { value, unit },
404        Value::Decimal { exponent, mantissa, unit } => Value::Decimal {
405            exponent,
406            mantissa: match mantissa {
407                DecimalMantissa::I64(i) => DecimalMantissa::I64(i),
408                DecimalMantissa::Big(b) => DecimalMantissa::Big(Cow::Owned(b.into_owned())),
409            },
410            unit,
411        },
412        Value::Text { value, language } => Value::Text {
413            value: Cow::Owned(value.into_owned()),
414            language,
415        },
416        Value::Bytes(b) => Value::Bytes(Cow::Owned(b.into_owned())),
417        Value::Date(s) => Value::Date(Cow::Owned(s.into_owned())),
418        Value::Time(s) => Value::Time(Cow::Owned(s.into_owned())),
419        Value::Datetime(s) => Value::Datetime(Cow::Owned(s.into_owned())),
420        Value::Schedule(s) => Value::Schedule(Cow::Owned(s.into_owned())),
421        Value::Point { lat, lon, alt } => Value::Point { lat, lon, alt },
422        Value::Rect { min_lat, min_lon, max_lat, max_lon } => Value::Rect { min_lat, min_lon, max_lat, max_lon },
423        Value::Embedding { sub_type, dims, data } => Value::Embedding {
424            sub_type,
425            dims,
426            data: Cow::Owned(data.into_owned()),
427        },
428    }
429}
430
431/// Reads an ID vector and checks for duplicates.
432fn read_id_vec_no_duplicates(
433    reader: &mut Reader<'_>,
434    max_len: usize,
435    field: &'static str,
436) -> Result<Vec<Id>, DecodeError> {
437    let count = reader.read_varint(field)? as usize;
438    if count > max_len {
439        return Err(DecodeError::LengthExceedsLimit {
440            field,
441            len: count,
442            max: max_len,
443        });
444    }
445
446    let mut ids = Vec::with_capacity(count);
447    let mut seen = FxHashSet::with_capacity_and_hasher(count, Default::default());
448
449    for _ in 0..count {
450        let id = reader.read_id(field)?;
451        if !seen.insert(id) {
452            return Err(DecodeError::DuplicateDictionaryEntry { dict: field, id });
453        }
454        ids.push(id);
455    }
456
457    Ok(ids)
458}
459
460fn decompress_zstd(compressed: &[u8]) -> Result<Vec<u8>, DecodeError> {
461    // Read uncompressed size
462    let mut reader = Reader::new(compressed);
463    let declared_size = reader.read_varint("uncompressed_size")? as usize;
464
465    if declared_size > MAX_EDIT_SIZE {
466        return Err(DecodeError::LengthExceedsLimit {
467            field: "uncompressed_size",
468            len: declared_size,
469            max: MAX_EDIT_SIZE,
470        });
471    }
472
473    let compressed_data = reader.remaining();
474
475    let mut decoder = zstd::Decoder::new(compressed_data)
476        .map_err(|e| DecodeError::DecompressionFailed(e.to_string()))?;
477
478    let mut decompressed = Vec::with_capacity(declared_size);
479    decoder
480        .read_to_end(&mut decompressed)
481        .map_err(|e| DecodeError::DecompressionFailed(e.to_string()))?;
482
483    if decompressed.len() != declared_size {
484        return Err(DecodeError::UncompressedSizeMismatch {
485            declared: declared_size,
486            actual: decompressed.len(),
487        });
488    }
489
490    Ok(decompressed)
491}
492
493// =============================================================================
494// ENCODING
495// =============================================================================
496
497/// Options for encoding edits.
498#[derive(Debug, Clone, Copy, Default)]
499pub struct EncodeOptions {
500    /// Enable canonical encoding mode.
501    ///
502    /// When enabled:
503    /// - Dictionary entries are sorted by ID bytes (lexicographic)
504    /// - This ensures deterministic output for the same logical edit
505    ///
506    /// Use canonical mode when:
507    /// - Computing content hashes for deduplication
508    /// - Creating signatures over edit content
509    /// - Ensuring cross-implementation reproducibility
510    ///
511    /// Note: Canonical mode requires two passes over the ops and is slower
512    /// than non-canonical encoding.
513    pub canonical: bool,
514}
515
516impl EncodeOptions {
517    /// Creates default (non-canonical) encoding options.
518    pub fn new() -> Self {
519        Self::default()
520    }
521
522    /// Creates canonical encoding options.
523    pub fn canonical() -> Self {
524        Self { canonical: true }
525    }
526}
527
528fn validate_context_limits(context: &Context) -> Result<(), EncodeError> {
529    if context.edges.len() > MAX_DICT_SIZE {
530        return Err(EncodeError::LengthExceedsLimit {
531            field: "context_edges",
532            len: context.edges.len(),
533            max: MAX_DICT_SIZE,
534        });
535    }
536    Ok(())
537}
538
539/// Encode-time structural validation aligned with spec.md:
540/// - Section 4.3: size limits and dictionary constraints
541/// - Section 4.4: canonical duplicate rules (enforced elsewhere) and author checks
542/// - Section 4.5 / 6.3: context structure and ContextRef requirements
543/// - Section 6.4: op type constraints
544/// - Section 3.2 / 3.6: update set/unset overlap and TEXT-only language slots
545fn validate_edit_inputs(edit: &Edit) -> Result<(), EncodeError> {
546    let name_len = edit.name.as_bytes().len();
547    if name_len > MAX_STRING_LEN {
548        return Err(EncodeError::LengthExceedsLimit {
549            field: "name",
550            len: name_len,
551            max: MAX_STRING_LEN,
552        });
553    }
554    if edit.authors.len() > MAX_AUTHORS {
555        return Err(EncodeError::LengthExceedsLimit {
556            field: "authors",
557            len: edit.authors.len(),
558            max: MAX_AUTHORS,
559        });
560    }
561    if edit.ops.len() > MAX_OPS_PER_EDIT {
562        return Err(EncodeError::LengthExceedsLimit {
563            field: "ops",
564            len: edit.ops.len(),
565            max: MAX_OPS_PER_EDIT,
566        });
567    }
568
569    let mut property_types: FxHashMap<Id, DataType> = FxHashMap::default();
570    let mut deleted_entities: FxHashSet<Id> = FxHashSet::default();
571    let mut deleted_relations: FxHashSet<Id> = FxHashSet::default();
572
573    for op in &edit.ops {
574        match op {
575            Op::CreateEntity(ce) => {
576                if deleted_entities.contains(&ce.id) {
577                    return Err(EncodeError::InvalidInput { context: "delete-then-create entity in same edit" });
578                }
579                if ce.values.len() > crate::limits::MAX_VALUES_PER_ENTITY {
580                    return Err(EncodeError::LengthExceedsLimit {
581                        field: "values",
582                        len: ce.values.len(),
583                        max: crate::limits::MAX_VALUES_PER_ENTITY,
584                    });
585                }
586                for pv in &ce.values {
587                    let dt = pv.value.data_type();
588                    if let Some(existing) = property_types.get(&pv.property) {
589                        if *existing != dt {
590                            return Err(EncodeError::InvalidInput { context: "property type mismatch" });
591                        }
592                    } else {
593                        property_types.insert(pv.property, dt);
594                    }
595                }
596                if let Some(ctx) = &ce.context {
597                    validate_context_limits(ctx)?;
598                }
599            }
600            Op::UpdateEntity(ue) => {
601                if ue.set_properties.len() > crate::limits::MAX_VALUES_PER_ENTITY {
602                    return Err(EncodeError::LengthExceedsLimit {
603                        field: "set_properties",
604                        len: ue.set_properties.len(),
605                        max: crate::limits::MAX_VALUES_PER_ENTITY,
606                    });
607                }
608                if ue.unset_values.len() > crate::limits::MAX_VALUES_PER_ENTITY {
609                    return Err(EncodeError::LengthExceedsLimit {
610                        field: "unset_values",
611                        len: ue.unset_values.len(),
612                        max: crate::limits::MAX_VALUES_PER_ENTITY,
613                    });
614                }
615                let mut set_langs: FxHashMap<Id, FxHashSet<Option<Id>>> = FxHashMap::default();
616                for pv in &ue.set_properties {
617                    let dt = pv.value.data_type();
618                    if let Some(existing) = property_types.get(&pv.property) {
619                        if *existing != dt {
620                            return Err(EncodeError::InvalidInput { context: "property type mismatch" });
621                        }
622                    } else {
623                        property_types.insert(pv.property, dt);
624                    }
625                    let lang_key = match &pv.value {
626                        crate::model::Value::Text { language, .. } => *language,
627                        _ => None,
628                    };
629                    set_langs.entry(pv.property).or_default().insert(lang_key);
630                }
631
632                for unset in &ue.unset_values {
633                    match &unset.language {
634                        UnsetLanguage::All => {
635                            if let Some(existing) = set_langs.get(&unset.property) {
636                                if !existing.is_empty() {
637                                    return Err(EncodeError::InvalidInput { context: "update_entity set/unset overlap" });
638                                }
639                            }
640                        }
641                        UnsetLanguage::English => {
642                            if let Some(existing) = set_langs.get(&unset.property) {
643                                if existing.contains(&None) {
644                                    return Err(EncodeError::InvalidInput { context: "update_entity set/unset overlap" });
645                                }
646                            }
647                            if let Some(existing) = property_types.get(&unset.property) {
648                                if *existing != DataType::Text {
649                                    return Err(EncodeError::InvalidInput { context: "unset language requires TEXT" });
650                                }
651                            } else {
652                                property_types.insert(unset.property, DataType::Text);
653                            }
654                        }
655                        UnsetLanguage::Specific(lang_id) => {
656                            if let Some(existing) = set_langs.get(&unset.property) {
657                                if existing.contains(&Some(*lang_id)) {
658                                    return Err(EncodeError::InvalidInput { context: "update_entity set/unset overlap" });
659                                }
660                            }
661                            if let Some(existing) = property_types.get(&unset.property) {
662                                if *existing != DataType::Text {
663                                    return Err(EncodeError::InvalidInput { context: "unset language requires TEXT" });
664                                }
665                            } else {
666                                property_types.insert(unset.property, DataType::Text);
667                            }
668                        }
669                    }
670                }
671                if let Some(ctx) = &ue.context {
672                    validate_context_limits(ctx)?;
673                }
674            }
675            Op::DeleteEntity(de) => {
676                deleted_entities.insert(de.id);
677                if let Some(ctx) = &de.context {
678                    validate_context_limits(ctx)?;
679                }
680            }
681            Op::RestoreEntity(re) => {
682                if let Some(ctx) = &re.context {
683                    validate_context_limits(ctx)?;
684                }
685            }
686            Op::CreateRelation(cr) => {
687                if deleted_relations.contains(&cr.id) {
688                    return Err(EncodeError::InvalidInput { context: "delete-then-create relation in same edit" });
689                }
690                if let Some(entity) = cr.entity {
691                    if entity == cr.id {
692                        return Err(EncodeError::InvalidInput { context: "relation entity must differ from id" });
693                    }
694                }
695                if let Some(ctx) = &cr.context {
696                    validate_context_limits(ctx)?;
697                }
698            }
699            Op::UpdateRelation(ur) => {
700                let mut seen_unset: FxHashSet<UnsetRelationField> = FxHashSet::default();
701                for field in &ur.unset {
702                    if !seen_unset.insert(*field) {
703                        return Err(EncodeError::InvalidInput { context: "update_relation duplicate unset field" });
704                    }
705                }
706                if ur.unset.contains(&UnsetRelationField::FromSpace) && ur.from_space.is_some() {
707                    return Err(EncodeError::InvalidInput { context: "update_relation set/unset overlap" });
708                }
709                if ur.unset.contains(&UnsetRelationField::FromVersion) && ur.from_version.is_some() {
710                    return Err(EncodeError::InvalidInput { context: "update_relation set/unset overlap" });
711                }
712                if ur.unset.contains(&UnsetRelationField::ToSpace) && ur.to_space.is_some() {
713                    return Err(EncodeError::InvalidInput { context: "update_relation set/unset overlap" });
714                }
715                if ur.unset.contains(&UnsetRelationField::ToVersion) && ur.to_version.is_some() {
716                    return Err(EncodeError::InvalidInput { context: "update_relation set/unset overlap" });
717                }
718                if ur.unset.contains(&UnsetRelationField::Position) && ur.position.is_some() {
719                    return Err(EncodeError::InvalidInput { context: "update_relation set/unset overlap" });
720                }
721                if let Some(ctx) = &ur.context {
722                    validate_context_limits(ctx)?;
723                }
724            }
725            Op::DeleteRelation(dr) => {
726                deleted_relations.insert(dr.id);
727                if let Some(ctx) = &dr.context {
728                    validate_context_limits(ctx)?;
729                }
730            }
731            Op::RestoreRelation(rr) => {
732                if let Some(ctx) = &rr.context {
733                    validate_context_limits(ctx)?;
734                }
735            }
736            Op::CreateValueRef(cvr) => {
737                if cvr.language.is_some() {
738                    if let Some(existing) = property_types.get(&cvr.property) {
739                        if *existing != DataType::Text {
740                            return Err(EncodeError::InvalidInput { context: "create_value_ref language requires TEXT" });
741                        }
742                    } else {
743                        property_types.insert(cvr.property, DataType::Text);
744                    }
745                }
746            }
747        }
748    }
749
750    Ok(())
751}
752
753/// Encodes an Edit to binary format (uncompressed).
754///
755/// Uses single-pass encoding: ops are encoded to a buffer while building
756/// dictionaries, then the final output is assembled.
757pub fn encode_edit(edit: &Edit) -> Result<Vec<u8>, EncodeError> {
758    encode_edit_with_options(edit, EncodeOptions::default())
759}
760
761/// Encodes an Edit to binary format with the given options.
762pub fn encode_edit_with_options(edit: &Edit, options: EncodeOptions) -> Result<Vec<u8>, EncodeError> {
763    validate_edit_inputs(edit)?;
764    if options.canonical {
765        encode_edit_canonical(edit)
766    } else {
767        encode_edit_fast(edit)
768    }
769}
770
771/// Fast single-pass encoding (non-canonical).
772fn encode_edit_fast(edit: &Edit) -> Result<Vec<u8>, EncodeError> {
773    // Property types are determined from values themselves (per-edit typing)
774    let property_types = rustc_hash::FxHashMap::default();
775
776    // Create dictionary builder - contexts will be collected from ops
777    let mut dict_builder = DictionaryBuilder::with_capacity(edit.ops.len());
778
779    // Single pass: encode ops while building dictionaries (including contexts)
780    let mut ops_writer = Writer::with_capacity(edit.ops.len() * 50);
781
782    for op in &edit.ops {
783        encode_op(&mut ops_writer, op, &mut dict_builder, &property_types)?;
784    }
785    dict_builder.validate_limits()?;
786
787    // Now assemble final output: header + dictionaries + contexts + ops
788    let ops_bytes = ops_writer.into_bytes();
789    let mut writer = Writer::with_capacity(256 + ops_bytes.len());
790
791    // Magic and version
792    writer.write_bytes(MAGIC_UNCOMPRESSED);
793    writer.write_byte(FORMAT_VERSION);
794
795    // Header
796    writer.write_id(&edit.id);
797    writer.write_string(&edit.name);
798    writer.write_id_vec(&edit.authors);
799    writer.write_signed_varint(edit.created_at);
800
801    // Dictionaries
802    dict_builder.write_dictionaries(&mut writer);
803
804    // Contexts (collected from ops during encoding)
805    dict_builder.write_contexts(&mut writer);
806
807    // Operations (already encoded)
808    writer.write_varint(edit.ops.len() as u64);
809    writer.write_bytes(&ops_bytes);
810
811    Ok(writer.into_bytes())
812}
813
814/// Canonical two-pass encoding with sorted dictionaries, authors, values, and unsets.
815///
816/// Pass 1: Collect all dictionary entries
817/// Pass 2: Sort dictionaries, encode with stable indices and sorted values
818///
819/// Canonical mode requirements (spec Section 4.4):
820/// - Dictionaries sorted by ID bytes
821/// - Authors sorted by ID bytes, no duplicates
822/// - Values sorted by (propertyRef, languageRef), no duplicate (property, language)
823/// - Unset values sorted by (propertyRef, language), no duplicates
824fn encode_edit_canonical(edit: &Edit) -> Result<Vec<u8>, EncodeError> {
825    // Property types are determined from values themselves (per-edit typing)
826    let property_types = rustc_hash::FxHashMap::default();
827
828    // Create dictionary builder - contexts will be collected from ops
829    let mut dict_builder = DictionaryBuilder::with_capacity(edit.ops.len());
830
831    // Pass 1: Collect all dictionary entries (including contexts) by doing a dry run
832    let mut temp_writer = Writer::with_capacity(edit.ops.len() * 50);
833    for op in &edit.ops {
834        encode_op(&mut temp_writer, op, &mut dict_builder, &property_types)?;
835    }
836    dict_builder.validate_limits()?;
837
838    // Sort dictionaries and get sorted builder
839    let sorted_builder = dict_builder.into_sorted();
840
841    // Sort authors by ID bytes and check for duplicates
842    let mut sorted_authors = edit.authors.clone();
843    sorted_authors.sort();
844    // Check for duplicate authors
845    for i in 1..sorted_authors.len() {
846        if sorted_authors[i] == sorted_authors[i - 1] {
847            return Err(EncodeError::DuplicateAuthor { id: sorted_authors[i] });
848        }
849    }
850
851    // Pass 2: Encode ops with sorted dictionary indices and sorted values
852    let mut ops_writer = Writer::with_capacity(edit.ops.len() * 50);
853    let mut canonical_builder = sorted_builder.clone();
854    for op in &edit.ops {
855        encode_op_canonical(&mut ops_writer, op, &mut canonical_builder, &property_types)?;
856    }
857
858    // Assemble final output: header + dictionaries + contexts + ops
859    let ops_bytes = ops_writer.into_bytes();
860    let mut writer = Writer::with_capacity(256 + ops_bytes.len());
861
862    // Magic and version
863    writer.write_bytes(MAGIC_UNCOMPRESSED);
864    writer.write_byte(FORMAT_VERSION);
865
866    // Header
867    writer.write_id(&edit.id);
868    writer.write_string(&edit.name);
869    writer.write_id_vec(&sorted_authors);
870    writer.write_signed_varint(edit.created_at);
871
872    // Dictionaries (sorted)
873    sorted_builder.write_dictionaries(&mut writer);
874
875    // Contexts (collected from ops during pass 1, sorted)
876    sorted_builder.write_contexts(&mut writer);
877
878    // Operations
879    writer.write_varint(edit.ops.len() as u64);
880    writer.write_bytes(&ops_bytes);
881
882    Ok(writer.into_bytes())
883}
884
885/// Encodes an op in canonical mode with sorted values.
886fn encode_op_canonical(
887    writer: &mut Writer,
888    op: &Op<'_>,
889    dict_builder: &mut DictionaryBuilder,
890    property_types: &FxHashMap<Id, DataType>,
891) -> Result<(), EncodeError> {
892    match op {
893        Op::CreateEntity(ce) => {
894            // Sort values by (property_index, language_index) and check for duplicates
895            let sorted_values = sort_and_check_values(&ce.values, dict_builder)?;
896
897            writer.write_byte(1); // OP_CREATE_ENTITY
898            writer.write_id(&ce.id);
899            writer.write_varint(sorted_values.len() as u64);
900
901            for pv in &sorted_values {
902                let data_type = property_types.get(&pv.property)
903                    .copied()
904                    .unwrap_or_else(|| pv.value.data_type());
905                encode_property_value_canonical(writer, pv, dict_builder, data_type)?;
906            }
907            // Write context_ref: 0xFFFFFFFF = no context, else index into contexts[]
908            let context_ref = match &ce.context {
909                Some(ctx) => dict_builder.add_context(ctx) as u32,
910                None => 0xFFFFFFFF,
911            };
912            writer.write_varint(context_ref as u64);
913            Ok(())
914        }
915        Op::UpdateEntity(ue) => {
916            // Sort set_properties and unset_values, check for duplicates
917            let sorted_set = sort_and_check_values(&ue.set_properties, dict_builder)?;
918            let sorted_unset = sort_and_check_unsets(&ue.unset_values, dict_builder)?;
919
920            writer.write_byte(2); // OP_UPDATE_ENTITY
921            let id_index = dict_builder.add_object(ue.id);
922            writer.write_varint(id_index as u64);
923
924            let mut flags = 0u8;
925            if !sorted_set.is_empty() {
926                flags |= 0x01; // FLAG_HAS_SET_PROPERTIES
927            }
928            if !sorted_unset.is_empty() {
929                flags |= 0x02; // FLAG_HAS_UNSET_VALUES
930            }
931            writer.write_byte(flags);
932
933            if !sorted_set.is_empty() {
934                writer.write_varint(sorted_set.len() as u64);
935                for pv in &sorted_set {
936                    let data_type = property_types.get(&pv.property)
937                        .copied()
938                        .unwrap_or_else(|| pv.value.data_type());
939                    encode_property_value_canonical(writer, pv, dict_builder, data_type)?;
940                }
941            }
942
943            if !sorted_unset.is_empty() {
944                use crate::model::UnsetLanguage;
945                writer.write_varint(sorted_unset.len() as u64);
946                for unset in &sorted_unset {
947                    let prop_idx = dict_builder.add_property(unset.property, DataType::Bool);
948                    writer.write_varint(prop_idx as u64);
949                    let lang_value: u32 = match &unset.language {
950                        UnsetLanguage::All => 0xFFFFFFFF,
951                        UnsetLanguage::English => 0,
952                        UnsetLanguage::Specific(lang_id) => {
953                            dict_builder.add_language(Some(*lang_id)) as u32
954                        }
955                    };
956                    writer.write_varint(lang_value as u64);
957                }
958            }
959            // Write context_ref: 0xFFFFFFFF = no context, else index into contexts[]
960            let context_ref = match &ue.context {
961                Some(ctx) => dict_builder.add_context(ctx) as u32,
962                None => 0xFFFFFFFF,
963            };
964            writer.write_varint(context_ref as u64);
965            Ok(())
966        }
967        // Other ops don't have values to sort, delegate to regular encode
968        _ => encode_op(writer, op, dict_builder, property_types),
969    }
970}
971
972/// Sorts values by (property_index, language_index) and checks for duplicates.
973fn sort_and_check_values<'a>(
974    values: &[crate::model::PropertyValue<'a>],
975    dict_builder: &DictionaryBuilder,
976) -> Result<Vec<crate::model::PropertyValue<'a>>, EncodeError> {
977    use crate::model::{PropertyValue, Value};
978
979    if values.is_empty() {
980        return Ok(Vec::new());
981    }
982
983    // Create (property_index, language_index, original_index) tuples for sorting
984    let mut indexed: Vec<(usize, usize, usize, &PropertyValue<'a>)> = values
985        .iter()
986        .enumerate()
987        .map(|(i, pv)| {
988            let prop_idx = dict_builder.get_property_index(&pv.property).unwrap_or(0);
989            let lang_idx = match &pv.value {
990                Value::Text { language, .. } => dict_builder.get_language_index(language.as_ref()).unwrap_or(0),
991                _ => 0,
992            };
993            (prop_idx, lang_idx, i, pv)
994        })
995        .collect();
996
997    // Sort by (property_index, language_index)
998    indexed.sort_by(|a, b| (a.0, a.1).cmp(&(b.0, b.1)));
999
1000    // Check for duplicates (adjacent entries with same property_index and language_index)
1001    for i in 1..indexed.len() {
1002        if indexed[i].0 == indexed[i - 1].0 && indexed[i].1 == indexed[i - 1].1 {
1003            let pv = indexed[i].3;
1004            let language = match &pv.value {
1005                Value::Text { language, .. } => *language,
1006                _ => None,
1007            };
1008            return Err(EncodeError::DuplicateValue {
1009                property: pv.property,
1010                language,
1011            });
1012        }
1013    }
1014
1015    // Return cloned values in sorted order
1016    Ok(indexed.into_iter().map(|(_, _, _, pv)| pv.clone()).collect())
1017}
1018
1019/// Sorts unset values by (property_index, language) and checks for duplicates.
1020fn sort_and_check_unsets(
1021    unsets: &[crate::model::UnsetValue],
1022    dict_builder: &DictionaryBuilder,
1023) -> Result<Vec<crate::model::UnsetValue>, EncodeError> {
1024    use crate::model::UnsetLanguage;
1025
1026    if unsets.is_empty() {
1027        return Ok(Vec::new());
1028    }
1029
1030    // Create (property_index, language_sort_key, original_index) tuples for sorting
1031    let mut indexed: Vec<(usize, u32, usize, &crate::model::UnsetValue)> = unsets
1032        .iter()
1033        .enumerate()
1034        .map(|(i, up)| {
1035            let prop_idx = dict_builder.get_property_index(&up.property).unwrap_or(0);
1036            let lang_key: u32 = match &up.language {
1037                UnsetLanguage::All => 0xFFFFFFFF,
1038                UnsetLanguage::English => 0,
1039                UnsetLanguage::Specific(lang_id) => {
1040                    dict_builder.get_language_index(Some(lang_id)).unwrap_or(0) as u32
1041                }
1042            };
1043            (prop_idx, lang_key, i, up)
1044        })
1045        .collect();
1046
1047    // Sort by (property_index, language_key)
1048    indexed.sort_by(|a, b| (a.0, a.1).cmp(&(b.0, b.1)));
1049
1050    // Check for duplicates
1051    for i in 1..indexed.len() {
1052        if indexed[i].0 == indexed[i - 1].0 && indexed[i].1 == indexed[i - 1].1 {
1053            let up = indexed[i].3;
1054            let language = match &up.language {
1055                UnsetLanguage::All => None,
1056                UnsetLanguage::English => None,
1057                UnsetLanguage::Specific(id) => Some(*id),
1058            };
1059            return Err(EncodeError::DuplicateUnset {
1060                property: up.property,
1061                language,
1062            });
1063        }
1064    }
1065
1066    Ok(indexed.into_iter().map(|(_, _, _, up)| up.clone()).collect())
1067}
1068
1069/// Encodes a property value in canonical mode (same as regular but separated for clarity).
1070fn encode_property_value_canonical(
1071    writer: &mut Writer,
1072    pv: &crate::model::PropertyValue<'_>,
1073    dict_builder: &mut DictionaryBuilder,
1074    data_type: DataType,
1075) -> Result<(), EncodeError> {
1076    let prop_index = dict_builder.add_property(pv.property, data_type);
1077    writer.write_varint(prop_index as u64);
1078    crate::codec::value::encode_value(writer, &pv.value, dict_builder)?;
1079    Ok(())
1080}
1081
1082/// Encodes an Edit with profiling output (two-pass for comparison).
1083pub fn encode_edit_profiled(edit: &Edit, profile: bool) -> Result<Vec<u8>, EncodeError> {
1084    if !profile {
1085        return encode_edit(edit);
1086    }
1087
1088    use std::time::Instant;
1089
1090    let t0 = Instant::now();
1091
1092    // Property types are determined from values themselves (per-edit typing)
1093    let property_types = rustc_hash::FxHashMap::default();
1094    let t1 = Instant::now();
1095
1096    // Create dictionary builder - contexts will be collected from ops
1097    let mut dict_builder = DictionaryBuilder::with_capacity(edit.ops.len());
1098
1099    // Single pass: encode ops while building dictionaries (including contexts)
1100    let mut ops_writer = Writer::with_capacity(edit.ops.len() * 50);
1101
1102    for op in &edit.ops {
1103        encode_op(&mut ops_writer, op, &mut dict_builder, &property_types)?;
1104    }
1105    let t2 = Instant::now();
1106
1107    // Assemble final output
1108    let ops_bytes = ops_writer.into_bytes();
1109    let mut writer = Writer::with_capacity(256 + ops_bytes.len());
1110
1111    writer.write_bytes(MAGIC_UNCOMPRESSED);
1112    writer.write_byte(FORMAT_VERSION);
1113    writer.write_id(&edit.id);
1114    writer.write_string(&edit.name);
1115    writer.write_id_vec(&edit.authors);
1116    writer.write_signed_varint(edit.created_at);
1117    dict_builder.write_dictionaries(&mut writer);
1118    dict_builder.write_contexts(&mut writer);
1119    writer.write_varint(edit.ops.len() as u64);
1120    writer.write_bytes(&ops_bytes);
1121    let t3 = Instant::now();
1122
1123    let result = writer.into_bytes();
1124
1125    let total = t3.duration_since(t0);
1126    eprintln!("=== Encode Profile (single-pass) ===");
1127    eprintln!("  setup: {:?} ({:.1}%)", t1.duration_since(t0), 100.0 * t1.duration_since(t0).as_secs_f64() / total.as_secs_f64());
1128    eprintln!("  encode_ops + build_dicts: {:?} ({:.1}%)", t2.duration_since(t1), 100.0 * t2.duration_since(t1).as_secs_f64() / total.as_secs_f64());
1129    eprintln!("  assemble output: {:?} ({:.1}%)", t3.duration_since(t2), 100.0 * t3.duration_since(t2).as_secs_f64() / total.as_secs_f64());
1130    eprintln!("  TOTAL: {:?}", total);
1131
1132    Ok(result)
1133}
1134
1135/// Encodes an Edit to binary format with zstd compression.
1136pub fn encode_edit_compressed(edit: &Edit, level: i32) -> Result<Vec<u8>, EncodeError> {
1137    encode_edit_compressed_with_options(edit, level, EncodeOptions::default())
1138}
1139
1140/// Encodes an Edit to binary format with zstd compression and options.
1141pub fn encode_edit_compressed_with_options(
1142    edit: &Edit,
1143    level: i32,
1144    options: EncodeOptions,
1145) -> Result<Vec<u8>, EncodeError> {
1146    let uncompressed = encode_edit_with_options(edit, options)?;
1147
1148    let compressed = zstd::encode_all(uncompressed.as_slice(), level)
1149        .map_err(|e| EncodeError::CompressionFailed(e.to_string()))?;
1150
1151    let mut writer = Writer::with_capacity(5 + 10 + compressed.len());
1152    writer.write_bytes(MAGIC_COMPRESSED);
1153    writer.write_varint(uncompressed.len() as u64);
1154    writer.write_bytes(&compressed);
1155
1156    Ok(writer.into_bytes())
1157}
1158
1159#[cfg(test)]
1160mod tests {
1161    use super::*;
1162    use crate::model::{
1163        CreateEntity, CreateRelation, CreateValueRef, DeleteEntity, DeleteRelation, PropertyValue,
1164        UpdateEntity, UpdateRelation, UnsetLanguage, UnsetRelationField, UnsetValue, Value,
1165    };
1166
1167    fn make_test_edit() -> Edit<'static> {
1168        Edit {
1169            id: [1u8; 16],
1170            name: Cow::Owned("Test Edit".to_string()),
1171            authors: vec![[2u8; 16]],
1172            created_at: 1234567890,
1173                        ops: vec![
1174                Op::CreateEntity(CreateEntity {
1175                    id: [3u8; 16],
1176                    values: vec![PropertyValue {
1177                        property: [10u8; 16],
1178                        value: Value::Text {
1179                            value: Cow::Owned("Hello".to_string()),
1180                            language: None,
1181                        },
1182                    }],
1183                    context: None,
1184                }),
1185            ],
1186        }
1187    }
1188
1189    #[test]
1190    fn test_edit_roundtrip() {
1191        let edit = make_test_edit();
1192
1193        let encoded = encode_edit(&edit).unwrap();
1194        let decoded = decode_edit(&encoded).unwrap();
1195
1196        assert_eq!(edit.id, decoded.id);
1197        assert_eq!(edit.name, decoded.name);
1198        assert_eq!(edit.authors, decoded.authors);
1199        assert_eq!(edit.created_at, decoded.created_at);
1200        assert_eq!(edit.ops.len(), decoded.ops.len());
1201    }
1202
1203    #[test]
1204    fn test_edit_compressed_roundtrip() {
1205        let edit = make_test_edit();
1206
1207        let encoded = encode_edit_compressed(&edit, 3).unwrap();
1208        let decoded = decode_edit(&encoded).unwrap();
1209
1210        assert_eq!(edit.id, decoded.id);
1211        assert_eq!(edit.name, decoded.name);
1212        assert_eq!(edit.authors, decoded.authors);
1213        assert_eq!(edit.created_at, decoded.created_at);
1214        assert_eq!(edit.ops.len(), decoded.ops.len());
1215    }
1216
1217    #[test]
1218    fn test_update_entity_set_unset_overlap_rejected() {
1219        let edit = Edit {
1220            id: [1u8; 16],
1221            name: Cow::Borrowed(""),
1222            authors: vec![],
1223            created_at: 0,
1224            ops: vec![Op::UpdateEntity(UpdateEntity {
1225                id: [2u8; 16],
1226                set_properties: vec![PropertyValue {
1227                    property: [3u8; 16],
1228                    value: Value::Text {
1229                        value: Cow::Owned("x".to_string()),
1230                        language: None,
1231                    },
1232                }],
1233                unset_values: vec![UnsetValue {
1234                    property: [3u8; 16],
1235                    language: UnsetLanguage::English,
1236                }],
1237                context: None,
1238            })],
1239        };
1240
1241        let err = encode_edit(&edit).unwrap_err();
1242        assert!(matches!(err, EncodeError::InvalidInput { .. }));
1243    }
1244
1245    #[test]
1246    fn test_unset_language_requires_text() {
1247        let edit = Edit {
1248            id: [1u8; 16],
1249            name: Cow::Borrowed(""),
1250            authors: vec![],
1251            created_at: 0,
1252            ops: vec![Op::UpdateEntity(UpdateEntity {
1253                id: [2u8; 16],
1254                set_properties: vec![PropertyValue {
1255                    property: [3u8; 16],
1256                    value: Value::Int64 { value: 1, unit: None },
1257                }],
1258                unset_values: vec![UnsetValue {
1259                    property: [3u8; 16],
1260                    language: UnsetLanguage::English,
1261                }],
1262                context: None,
1263            })],
1264        };
1265
1266        let err = encode_edit(&edit).unwrap_err();
1267        assert!(matches!(err, EncodeError::InvalidInput { .. }));
1268    }
1269
1270    #[test]
1271    fn test_update_relation_set_unset_overlap_rejected() {
1272        let edit = Edit {
1273            id: [1u8; 16],
1274            name: Cow::Borrowed(""),
1275            authors: vec![],
1276            created_at: 0,
1277            ops: vec![Op::UpdateRelation(UpdateRelation {
1278                id: [4u8; 16],
1279                from_space: Some([5u8; 16]),
1280                from_version: None,
1281                to_space: None,
1282                to_version: None,
1283                position: None,
1284                unset: vec![UnsetRelationField::FromSpace],
1285                context: None,
1286            })],
1287        };
1288
1289        let err = encode_edit(&edit).unwrap_err();
1290        assert!(matches!(err, EncodeError::InvalidInput { .. }));
1291    }
1292
1293    #[test]
1294    fn test_property_type_mismatch_rejected() {
1295        let edit = Edit {
1296            id: [1u8; 16],
1297            name: Cow::Borrowed(""),
1298            authors: vec![],
1299            created_at: 0,
1300            ops: vec![
1301                Op::CreateEntity(CreateEntity {
1302                    id: [2u8; 16],
1303                    values: vec![PropertyValue {
1304                        property: [3u8; 16],
1305                        value: Value::Text {
1306                            value: Cow::Owned("x".to_string()),
1307                            language: None,
1308                        },
1309                    }],
1310                    context: None,
1311                }),
1312                Op::UpdateEntity(UpdateEntity {
1313                    id: [2u8; 16],
1314                    set_properties: vec![PropertyValue {
1315                        property: [3u8; 16],
1316                        value: Value::Int64 { value: 1, unit: None },
1317                    }],
1318                    unset_values: vec![],
1319                    context: None,
1320                }),
1321            ],
1322        };
1323
1324        let err = encode_edit(&edit).unwrap_err();
1325        assert!(matches!(err, EncodeError::InvalidInput { .. }));
1326    }
1327
1328    #[test]
1329    fn test_delete_then_create_entity_rejected() {
1330        let edit = Edit {
1331            id: [1u8; 16],
1332            name: Cow::Borrowed(""),
1333            authors: vec![],
1334            created_at: 0,
1335            ops: vec![
1336                Op::DeleteEntity(DeleteEntity {
1337                    id: [2u8; 16],
1338                    context: None,
1339                }),
1340                Op::CreateEntity(CreateEntity {
1341                    id: [2u8; 16],
1342                    values: vec![],
1343                    context: None,
1344                }),
1345            ],
1346        };
1347
1348        let err = encode_edit(&edit).unwrap_err();
1349        assert!(matches!(err, EncodeError::InvalidInput { .. }));
1350    }
1351
1352    #[test]
1353    fn test_delete_then_create_relation_rejected() {
1354        let edit = Edit {
1355            id: [1u8; 16],
1356            name: Cow::Borrowed(""),
1357            authors: vec![],
1358            created_at: 0,
1359            ops: vec![
1360                Op::DeleteRelation(DeleteRelation {
1361                    id: [4u8; 16],
1362                    context: None,
1363                }),
1364                Op::CreateRelation(CreateRelation {
1365                    id: [4u8; 16],
1366                    relation_type: [5u8; 16],
1367                    from: [6u8; 16],
1368                    from_is_value_ref: false,
1369                    from_space: None,
1370                    from_version: None,
1371                    to: [7u8; 16],
1372                    to_is_value_ref: false,
1373                    to_space: None,
1374                    to_version: None,
1375                    entity: None,
1376                    position: None,
1377                    context: None,
1378                }),
1379            ],
1380        };
1381
1382        let err = encode_edit(&edit).unwrap_err();
1383        assert!(matches!(err, EncodeError::InvalidInput { .. }));
1384    }
1385
1386    #[test]
1387    fn test_create_relation_entity_equals_id_rejected() {
1388        let edit = Edit {
1389            id: [1u8; 16],
1390            name: Cow::Borrowed(""),
1391            authors: vec![],
1392            created_at: 0,
1393            ops: vec![Op::CreateRelation(CreateRelation {
1394                id: [4u8; 16],
1395                relation_type: [5u8; 16],
1396                from: [6u8; 16],
1397                from_is_value_ref: false,
1398                from_space: None,
1399                from_version: None,
1400                to: [7u8; 16],
1401                to_is_value_ref: false,
1402                to_space: None,
1403                to_version: None,
1404                entity: Some([4u8; 16]),
1405                position: None,
1406                context: None,
1407            })],
1408        };
1409
1410        let err = encode_edit(&edit).unwrap_err();
1411        assert!(matches!(err, EncodeError::InvalidInput { .. }));
1412    }
1413
1414    #[test]
1415    fn test_create_value_ref_language_requires_text() {
1416        let edit = Edit {
1417            id: [1u8; 16],
1418            name: Cow::Borrowed(""),
1419            authors: vec![],
1420            created_at: 0,
1421            ops: vec![
1422                Op::CreateEntity(CreateEntity {
1423                    id: [2u8; 16],
1424                    values: vec![PropertyValue {
1425                        property: [3u8; 16],
1426                        value: Value::Int64 { value: 1, unit: None },
1427                    }],
1428                    context: None,
1429                }),
1430                Op::CreateValueRef(CreateValueRef {
1431                    id: [8u8; 16],
1432                    entity: [2u8; 16],
1433                    property: [3u8; 16],
1434                    language: Some([9u8; 16]),
1435                    space: None,
1436                }),
1437            ],
1438        };
1439
1440        let err = encode_edit(&edit).unwrap_err();
1441        assert!(matches!(err, EncodeError::InvalidInput { .. }));
1442    }
1443
1444    #[test]
1445    fn test_value_ref_endpoints_not_in_object_ids() {
1446        let edit = Edit {
1447            id: [1u8; 16],
1448            name: Cow::Borrowed(""),
1449            authors: vec![],
1450            created_at: 0,
1451            ops: vec![
1452                Op::CreateValueRef(CreateValueRef {
1453                    id: [10u8; 16],
1454                    entity: [2u8; 16],
1455                    property: [3u8; 16],
1456                    language: None,
1457                    space: None,
1458                }),
1459                Op::CreateRelation(CreateRelation {
1460                    id: [4u8; 16],
1461                    relation_type: [5u8; 16],
1462                    from: [10u8; 16],
1463                    from_is_value_ref: true,
1464                    from_space: None,
1465                    from_version: None,
1466                    to: [2u8; 16],
1467                    to_is_value_ref: false,
1468                    to_space: None,
1469                    to_version: None,
1470                    entity: None,
1471                    position: None,
1472                    context: None,
1473                }),
1474            ],
1475        };
1476
1477        let encoded = encode_edit(&edit).unwrap();
1478        let mut reader = Reader::new(&encoded);
1479        reader.read_bytes(4, "magic").unwrap();
1480        reader.read_byte("version").unwrap();
1481        reader.read_id("edit_id").unwrap();
1482        reader.read_string(MAX_STRING_LEN, "name").unwrap();
1483        reader.read_id_vec(MAX_AUTHORS, "authors").unwrap();
1484        reader.read_signed_varint("created_at").unwrap();
1485        let property_count = reader.read_varint("property_count").unwrap() as usize;
1486        for _ in 0..property_count {
1487            reader.read_id("property_id").unwrap();
1488            reader.read_byte("data_type").unwrap();
1489        }
1490        let _relation_types = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "relation_types").unwrap();
1491        let _languages = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "languages").unwrap();
1492        let _units = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "units").unwrap();
1493        let objects = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "objects").unwrap();
1494        let _context_ids = read_id_vec_no_duplicates(&mut reader, MAX_DICT_SIZE, "context_ids").unwrap();
1495
1496        assert!(!objects.contains(&[10u8; 16]));
1497        assert!(objects.contains(&[2u8; 16]));
1498    }
1499
1500    #[test]
1501    fn test_canonical_rejects_duplicate_unset() {
1502        let edit = Edit {
1503            id: [1u8; 16],
1504            name: Cow::Borrowed(""),
1505            authors: vec![],
1506            created_at: 0,
1507            ops: vec![Op::UpdateEntity(UpdateEntity {
1508                id: [2u8; 16],
1509                set_properties: vec![],
1510                unset_values: vec![
1511                    UnsetValue {
1512                        property: [3u8; 16],
1513                        language: UnsetLanguage::English,
1514                    },
1515                    UnsetValue {
1516                        property: [3u8; 16],
1517                        language: UnsetLanguage::English,
1518                    },
1519                ],
1520                context: None,
1521            })],
1522        };
1523
1524        let err = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap_err();
1525        assert!(matches!(err, EncodeError::DuplicateUnset { .. }));
1526    }
1527
1528    #[test]
1529    fn test_canonical_rejects_duplicate_update_relation_unset_fields() {
1530        let edit = Edit {
1531            id: [1u8; 16],
1532            name: Cow::Borrowed(""),
1533            authors: vec![],
1534            created_at: 0,
1535            ops: vec![Op::UpdateRelation(UpdateRelation {
1536                id: [4u8; 16],
1537                from_space: None,
1538                from_version: None,
1539                to_space: None,
1540                to_version: None,
1541                position: None,
1542                unset: vec![UnsetRelationField::FromSpace, UnsetRelationField::FromSpace],
1543                context: None,
1544            })],
1545        };
1546
1547        let err = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap_err();
1548        assert!(matches!(err, EncodeError::InvalidInput { .. }));
1549    }
1550
1551    #[test]
1552    fn test_compression_magic() {
1553        let edit = make_test_edit();
1554
1555        let uncompressed = encode_edit(&edit).unwrap();
1556        let compressed = encode_edit_compressed(&edit, 3).unwrap();
1557
1558        assert_eq!(&uncompressed[0..4], b"GRC2");
1559        assert_eq!(&compressed[0..5], b"GRC2Z");
1560    }
1561
1562    #[test]
1563    fn test_invalid_magic() {
1564        let data = b"XXXX";
1565        let result = decode_edit(data);
1566        assert!(matches!(result, Err(DecodeError::InvalidMagic { .. })));
1567    }
1568
1569    #[test]
1570    fn test_unsupported_version() {
1571        let mut data = Vec::new();
1572        data.extend_from_slice(MAGIC_UNCOMPRESSED);
1573        data.push(99); // Invalid version
1574        // Add enough bytes to not trigger EOF
1575        data.extend_from_slice(&[0u8; 100]);
1576
1577        let result = decode_edit(&data);
1578        assert!(matches!(result, Err(DecodeError::UnsupportedVersion { version: 99 })));
1579    }
1580
1581    #[test]
1582    fn test_empty_edit() {
1583        let edit: Edit<'static> = Edit {
1584            id: [0u8; 16],
1585            name: Cow::Borrowed(""),
1586            authors: vec![],
1587            created_at: 0,
1588                        ops: vec![],
1589        };
1590
1591        let encoded = encode_edit(&edit).unwrap();
1592        let decoded = decode_edit(&encoded).unwrap();
1593
1594        assert_eq!(edit.id, decoded.id);
1595        assert!(decoded.name.is_empty());
1596        assert!(decoded.authors.is_empty());
1597        assert!(decoded.ops.is_empty());
1598    }
1599
1600    #[test]
1601    fn test_canonical_encoding_deterministic() {
1602        // Two edits with values in different order should produce
1603        // identical bytes when using canonical encoding
1604
1605        let prop_a = [0x0A; 16]; // Comes first lexicographically
1606        let prop_b = [0x0B; 16]; // Comes second
1607
1608        // Edit 1: values in order A, B
1609        let edit1: Edit<'static> = Edit {
1610            id: [1u8; 16],
1611            name: Cow::Owned("Test".to_string()),
1612            authors: vec![],
1613            created_at: 0,
1614                        ops: vec![
1615                Op::CreateEntity(CreateEntity {
1616                    id: [3u8; 16],
1617                    values: vec![
1618                        PropertyValue {
1619                            property: prop_a,
1620                            value: Value::Text {
1621                                value: Cow::Owned("Hello".to_string()),
1622                                language: None,
1623                            },
1624                        },
1625                        PropertyValue {
1626                            property: prop_b,
1627                            value: Value::Int64 { value: 42, unit: None },
1628                        },
1629                    ],
1630                    context: None,
1631                }),
1632            ],
1633        };
1634
1635        // Edit 2: Same content but values in different order
1636        let edit2: Edit<'static> = Edit {
1637            id: [1u8; 16],
1638            name: Cow::Owned("Test".to_string()),
1639            authors: vec![],
1640            created_at: 0,
1641                        ops: vec![
1642                Op::CreateEntity(CreateEntity {
1643                    id: [3u8; 16],
1644                    values: vec![
1645                        // Note: prop_b first this time (different insertion order)
1646                        PropertyValue {
1647                            property: prop_b,
1648                            value: Value::Int64 { value: 42, unit: None },
1649                        },
1650                        PropertyValue {
1651                            property: prop_a,
1652                            value: Value::Text {
1653                                value: Cow::Owned("Hello".to_string()),
1654                                language: None,
1655                            },
1656                        },
1657                    ],
1658                    context: None,
1659                }),
1660            ],
1661        };
1662
1663        // Non-canonical encoding may produce different bytes
1664        let fast1 = encode_edit_with_options(&edit1, EncodeOptions::new()).unwrap();
1665        let fast2 = encode_edit_with_options(&edit2, EncodeOptions::new()).unwrap();
1666        // These might differ because dictionary order depends on insertion order
1667        // (We don't assert they're different because they might happen to be the same)
1668
1669        // Canonical encoding MUST produce identical bytes for same logical content
1670        let canonical1 = encode_edit_with_options(&edit1, EncodeOptions::canonical()).unwrap();
1671        let canonical2 = encode_edit_with_options(&edit2, EncodeOptions::canonical()).unwrap();
1672
1673        // Both should decode correctly
1674        let decoded1 = decode_edit(&canonical1).unwrap();
1675        let decoded2 = decode_edit(&canonical2).unwrap();
1676        assert_eq!(decoded1.id, edit1.id);
1677        assert_eq!(decoded2.id, edit2.id);
1678
1679        // And the encoded bytes should be identical (deterministic)
1680        // Note: The ops themselves may have different value orders, but the dictionary
1681        // portion should be identical since it's sorted by ID
1682        assert_eq!(
1683            &canonical1[..50], // Check header + dictionary start
1684            &canonical2[..50],
1685            "Canonical encoding should produce identical dictionary bytes"
1686        );
1687
1688        // Verify the edit still roundtrips
1689        let _ = fast1;
1690        let _ = fast2;
1691    }
1692
1693    #[test]
1694    fn test_canonical_encoding_roundtrip() {
1695        let edit = make_test_edit();
1696
1697        let encoded = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap();
1698        let decoded = decode_edit(&encoded).unwrap();
1699
1700        assert_eq!(edit.id, decoded.id);
1701        assert_eq!(edit.name, decoded.name);
1702        assert_eq!(edit.authors, decoded.authors);
1703        assert_eq!(edit.created_at, decoded.created_at);
1704        assert_eq!(edit.ops.len(), decoded.ops.len());
1705    }
1706
1707    #[test]
1708    fn test_canonical_encoding_compressed() {
1709        let edit = make_test_edit();
1710
1711        let encoded = encode_edit_compressed_with_options(&edit, 3, EncodeOptions::canonical()).unwrap();
1712        let decoded = decode_edit(&encoded).unwrap();
1713
1714        assert_eq!(edit.id, decoded.id);
1715        assert_eq!(edit.name, decoded.name);
1716    }
1717
1718    #[test]
1719    fn test_canonical_rejects_duplicate_authors() {
1720        let author1 = [1u8; 16];
1721
1722        let edit: Edit<'static> = Edit {
1723            id: [0u8; 16],
1724            name: Cow::Owned("Test".to_string()),
1725            authors: vec![author1, author1], // Duplicate!
1726            created_at: 0,
1727                        ops: vec![],
1728        };
1729
1730        // Fast mode doesn't check duplicates
1731        let result = encode_edit_with_options(&edit, EncodeOptions::new());
1732        assert!(result.is_ok());
1733
1734        // Canonical mode rejects duplicates
1735        let result = encode_edit_with_options(&edit, EncodeOptions::canonical());
1736        assert!(matches!(result, Err(EncodeError::DuplicateAuthor { .. })));
1737    }
1738
1739    #[test]
1740    fn test_canonical_rejects_duplicate_values() {
1741        let prop = [10u8; 16];
1742
1743        let edit: Edit<'static> = Edit {
1744            id: [0u8; 16],
1745            name: Cow::Owned("Test".to_string()),
1746            authors: vec![],
1747            created_at: 0,
1748                        ops: vec![
1749                Op::CreateEntity(CreateEntity {
1750                    id: [1u8; 16],
1751                    values: vec![
1752                        PropertyValue {
1753                            property: prop,
1754                            value: Value::Text {
1755                                value: Cow::Owned("First".to_string()),
1756                                language: None,
1757                            },
1758                        },
1759                        PropertyValue {
1760                            property: prop,
1761                            value: Value::Text {
1762                                value: Cow::Owned("Second".to_string()),
1763                                language: None,
1764                            },
1765                        },
1766                    ],
1767                    context: None,
1768                }),
1769            ],
1770        };
1771
1772        // Canonical mode rejects duplicate (property, language) pairs
1773        let result = encode_edit_with_options(&edit, EncodeOptions::canonical());
1774        assert!(matches!(result, Err(EncodeError::DuplicateValue { .. })));
1775    }
1776
1777    #[test]
1778    fn test_canonical_allows_different_languages() {
1779        let prop = [10u8; 16];
1780        let lang_en = [20u8; 16];
1781        let lang_es = [21u8; 16];
1782
1783        let edit: Edit<'static> = Edit {
1784            id: [0u8; 16],
1785            name: Cow::Owned("Test".to_string()),
1786            authors: vec![],
1787            created_at: 0,
1788                        ops: vec![
1789                Op::CreateEntity(CreateEntity {
1790                    id: [1u8; 16],
1791                    values: vec![
1792                        PropertyValue {
1793                            property: prop,
1794                            value: Value::Text {
1795                                value: Cow::Owned("Hello".to_string()),
1796                                language: Some(lang_en),
1797                            },
1798                        },
1799                        PropertyValue {
1800                            property: prop,
1801                            value: Value::Text {
1802                                value: Cow::Owned("Hola".to_string()),
1803                                language: Some(lang_es),
1804                            },
1805                        },
1806                    ],
1807                    context: None,
1808                }),
1809            ],
1810        };
1811
1812        // Different languages for same property is allowed
1813        let result = encode_edit_with_options(&edit, EncodeOptions::canonical());
1814        assert!(result.is_ok());
1815    }
1816
1817    #[test]
1818    fn test_canonical_sorts_values_deterministically() {
1819        let prop_a = [0x0A; 16];
1820        let prop_b = [0x0B; 16];
1821
1822        // Values in reverse order (B before A)
1823        let edit: Edit<'static> = Edit {
1824            id: [1u8; 16],
1825            name: Cow::Owned("Test".to_string()),
1826            authors: vec![],
1827            created_at: 0,
1828                        ops: vec![
1829                Op::CreateEntity(CreateEntity {
1830                    id: [3u8; 16],
1831                    values: vec![
1832                        PropertyValue {
1833                            property: prop_b, // B first
1834                            value: Value::Int64 { value: 42, unit: None },
1835                        },
1836                        PropertyValue {
1837                            property: prop_a, // A second
1838                            value: Value::Text {
1839                                value: Cow::Owned("Hello".to_string()),
1840                                language: None,
1841                            },
1842                        },
1843                    ],
1844                    context: None,
1845                }),
1846            ],
1847        };
1848
1849        // Encode twice - should produce identical bytes
1850        let encoded1 = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap();
1851        let encoded2 = encode_edit_with_options(&edit, EncodeOptions::canonical()).unwrap();
1852        assert_eq!(encoded1, encoded2, "Canonical encoding should be deterministic");
1853
1854        // Should roundtrip
1855        let decoded = decode_edit(&encoded1).unwrap();
1856        assert_eq!(decoded.ops.len(), 1);
1857    }
1858}