grc-20 0.4.0

GRC-20 v2 binary property graph format for decentralized knowledge networks
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
//! Edit structure for batched operations.
//!
//! Edits are standalone patches containing a batch of ops with metadata.

use std::borrow::Cow;

use rustc_hash::FxHashMap;

use crate::codec::primitives::Writer;
use crate::error::EncodeError;
use crate::limits::MAX_DICT_SIZE;
use crate::model::{DataType, Id, Op};

/// An edge in a context path (spec Section 4.5).
///
/// Represents a step in the path from the root entity to the changed entity.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ContextEdge {
    /// The relation type ID for this edge (e.g., BLOCKS_ID).
    pub type_id: Id,
    /// The target entity ID at this edge.
    pub to_entity_id: Id,
}

/// Context metadata for grouping changes (spec Section 4.5).
///
/// Provides the path from a root entity to the changed entity,
/// enabling context-aware change grouping (e.g., grouping block changes
/// under their parent entity).
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Context {
    /// The root entity for this context.
    pub root_id: Id,
    /// Path from root to the changed entity.
    pub edges: Vec<ContextEdge>,
}

/// A batch of operations with metadata (spec Section 4.1).
///
/// Edits are standalone patches. They contain no parent references;
/// ordering is provided by on-chain governance.
#[derive(Debug, Clone, PartialEq)]
pub struct Edit<'a> {
    /// The edit's unique identifier.
    pub id: Id,
    /// Optional human-readable name.
    pub name: Cow<'a, str>,
    /// Author entity IDs.
    pub authors: Vec<Id>,
    /// Creation timestamp (metadata only, not used for conflict resolution).
    pub created_at: i64,
    /// Operations in this edit.
    pub ops: Vec<Op<'a>>,
}

impl<'a> Edit<'a> {
    /// Creates a new empty edit with the given ID.
    pub fn new(id: Id) -> Self {
        Self {
            id,
            name: Cow::Borrowed(""),
            authors: Vec::new(),
            created_at: 0,
            ops: Vec::new(),
        }
    }

    /// Creates a new empty edit with the given ID and name.
    pub fn with_name(id: Id, name: impl Into<Cow<'a, str>>) -> Self {
        Self {
            id,
            name: name.into(),
            authors: Vec::new(),
            created_at: 0,
            ops: Vec::new(),
        }
    }
}

/// Wire-format dictionaries for encoding/decoding.
///
/// These dictionaries map between full IDs and compact indices
/// within an edit.
#[derive(Debug, Clone, Default)]
pub struct WireDictionaries {
    /// Properties dictionary: (ID, DataType) pairs.
    pub properties: Vec<(Id, DataType)>,
    /// Relation type IDs.
    pub relation_types: Vec<Id>,
    /// Language entity IDs for localized TEXT values.
    pub languages: Vec<Id>,
    /// Unit entity IDs for numerical values.
    pub units: Vec<Id>,
    /// Object IDs (entities and relations).
    pub objects: Vec<Id>,
    /// Context IDs (root_ids and edge to_entity_ids) - used during encoding/decoding.
    pub context_ids: Vec<Id>,
    /// Decoded contexts array - used by op decoders to resolve context_ref to Context.
    pub contexts: Vec<Context>,
}

impl WireDictionaries {
    /// Creates empty dictionaries.
    pub fn new() -> Self {
        Self::default()
    }

    /// Looks up a property ID by index.
    pub fn get_property(&self, index: usize) -> Option<&(Id, DataType)> {
        self.properties.get(index)
    }

    /// Looks up a relation type ID by index.
    pub fn get_relation_type(&self, index: usize) -> Option<&Id> {
        self.relation_types.get(index)
    }

    /// Looks up a language ID by index.
    ///
    /// Index 0 means default (no language), returns None.
    /// Index 1+ maps to languages[index-1].
    pub fn get_language(&self, index: usize) -> Option<&Id> {
        if index == 0 {
            None
        } else {
            self.languages.get(index - 1)
        }
    }

    /// Looks up a unit ID by index.
    ///
    /// Index 0 means no unit, returns None.
    /// Index 1+ maps to units[index-1].
    pub fn get_unit(&self, index: usize) -> Option<&Id> {
        if index == 0 {
            None
        } else {
            self.units.get(index - 1)
        }
    }

    /// Looks up an object ID by index.
    pub fn get_object(&self, index: usize) -> Option<&Id> {
        self.objects.get(index)
    }

    /// Looks up a context ID by index.
    pub fn get_context_id(&self, index: usize) -> Option<&Id> {
        self.context_ids.get(index)
    }

    /// Looks up a context by index.
    pub fn get_context(&self, index: usize) -> Option<&Context> {
        self.contexts.get(index)
    }
}

/// Builder for constructing wire dictionaries during encoding.
///
/// Uses FxHashMap for faster hashing of 16-byte IDs.
#[derive(Debug, Clone, Default)]
pub struct DictionaryBuilder {
    properties: Vec<(Id, DataType)>,
    property_indices: FxHashMap<Id, usize>,
    relation_types: Vec<Id>,
    relation_type_indices: FxHashMap<Id, usize>,
    languages: Vec<Id>,
    language_indices: FxHashMap<Id, usize>,
    units: Vec<Id>,
    unit_indices: FxHashMap<Id, usize>,
    objects: Vec<Id>,
    object_indices: FxHashMap<Id, usize>,
    context_ids: Vec<Id>,
    context_id_indices: FxHashMap<Id, usize>,
    contexts: Vec<Context>,
    context_indices: FxHashMap<Context, usize>,
}

impl DictionaryBuilder {
    /// Creates a new empty builder.
    pub fn new() -> Self {
        Self::default()
    }

    /// Creates a new builder with pre-allocated capacity.
    ///
    /// `estimated_ops` is used to estimate dictionary sizes:
    /// - properties: ~estimated_ops / 4 (entities average ~4 properties)
    /// - relation_types: ~estimated_ops / 20 (fewer unique relation types)
    /// - languages: 4 (typically few languages per edit)
    /// - units: 4 (typically few units per edit)
    /// - objects: ~estimated_ops / 2 (many ops reference existing objects)
    /// - context_ids: 8 (typically few context IDs per edit)
    /// - contexts: 4 (typically few unique contexts per edit)
    pub fn with_capacity(estimated_ops: usize) -> Self {
        let prop_cap = estimated_ops / 4 + 1;
        let rel_cap = estimated_ops / 20 + 1;
        let lang_cap = 4;
        let unit_cap = 4;
        let obj_cap = estimated_ops / 2 + 1;
        let ctx_id_cap = 8;
        let ctx_cap = 4;

        Self {
            properties: Vec::with_capacity(prop_cap),
            property_indices: FxHashMap::with_capacity_and_hasher(prop_cap, Default::default()),
            relation_types: Vec::with_capacity(rel_cap),
            relation_type_indices: FxHashMap::with_capacity_and_hasher(rel_cap, Default::default()),
            languages: Vec::with_capacity(lang_cap),
            language_indices: FxHashMap::with_capacity_and_hasher(lang_cap, Default::default()),
            units: Vec::with_capacity(unit_cap),
            unit_indices: FxHashMap::with_capacity_and_hasher(unit_cap, Default::default()),
            objects: Vec::with_capacity(obj_cap),
            object_indices: FxHashMap::with_capacity_and_hasher(obj_cap, Default::default()),
            context_ids: Vec::with_capacity(ctx_id_cap),
            context_id_indices: FxHashMap::with_capacity_and_hasher(ctx_id_cap, Default::default()),
            contexts: Vec::with_capacity(ctx_cap),
            context_indices: FxHashMap::with_capacity_and_hasher(ctx_cap, Default::default()),
        }
    }

    /// Adds or gets the index for a property.
    pub fn add_property(&mut self, id: Id, data_type: DataType) -> usize {
        if let Some(&idx) = self.property_indices.get(&id) {
            idx
        } else {
            let idx = self.properties.len();
            self.properties.push((id, data_type));
            self.property_indices.insert(id, idx);
            idx
        }
    }

    /// Adds or gets the index for a relation type.
    pub fn add_relation_type(&mut self, id: Id) -> usize {
        if let Some(&idx) = self.relation_type_indices.get(&id) {
            idx
        } else {
            let idx = self.relation_types.len();
            self.relation_types.push(id);
            self.relation_type_indices.insert(id, idx);
            idx
        }
    }

    /// Adds or gets the index for a language.
    ///
    /// Returns 0 for default (no language), 1+ for actual languages.
    pub fn add_language(&mut self, id: Option<Id>) -> usize {
        match id {
            None => 0,
            Some(lang_id) => {
                if let Some(&idx) = self.language_indices.get(&lang_id) {
                    idx + 1
                } else {
                    let idx = self.languages.len();
                    self.languages.push(lang_id);
                    self.language_indices.insert(lang_id, idx);
                    idx + 1
                }
            }
        }
    }

    /// Adds or gets the index for a unit.
    ///
    /// Returns 0 for no unit, 1+ for actual units.
    pub fn add_unit(&mut self, id: Option<Id>) -> usize {
        match id {
            None => 0,
            Some(unit_id) => {
                if let Some(&idx) = self.unit_indices.get(&unit_id) {
                    idx + 1
                } else {
                    let idx = self.units.len();
                    self.units.push(unit_id);
                    self.unit_indices.insert(unit_id, idx);
                    idx + 1
                }
            }
        }
    }

    /// Adds or gets the index for an object.
    pub fn add_object(&mut self, id: Id) -> usize {
        if let Some(&idx) = self.object_indices.get(&id) {
            idx
        } else {
            let idx = self.objects.len();
            self.objects.push(id);
            self.object_indices.insert(id, idx);
            idx
        }
    }

    /// Adds or gets the index for a context ID.
    pub fn add_context_id(&mut self, id: Id) -> usize {
        if let Some(&idx) = self.context_id_indices.get(&id) {
            idx
        } else {
            let idx = self.context_ids.len();
            self.context_ids.push(id);
            self.context_id_indices.insert(id, idx);
            idx
        }
    }

    /// Adds or gets the index for a context.
    ///
    /// If the context is new, registers all its IDs to the appropriate dictionaries:
    /// - root_id and edge.to_entity_id go to context_ids dictionary
    /// - edge.type_id goes to relation_types dictionary (it's a RelationTypeRef)
    /// Returns the index into the contexts array.
    pub fn add_context(&mut self, context: &Context) -> usize {
        if let Some(&idx) = self.context_indices.get(context) {
            idx
        } else {
            // Register all IDs in the context to appropriate dictionaries
            self.add_context_id(context.root_id);
            for edge in &context.edges {
                // type_id is a relation type, not a context ID
                self.add_relation_type(edge.type_id);
                self.add_context_id(edge.to_entity_id);
            }

            // Add context to contexts array
            let idx = self.contexts.len();
            self.contexts.push(context.clone());
            self.context_indices.insert(context.clone(), idx);
            idx
        }
    }

    /// Gets the index for an existing context (for encoding).
    pub fn get_context_index(&self, context: &Context) -> Option<usize> {
        self.context_indices.get(context).copied()
    }

    /// Builds the final wire dictionaries (consumes the builder).
    pub fn build(self) -> WireDictionaries {
        WireDictionaries {
            properties: self.properties,
            relation_types: self.relation_types,
            languages: self.languages,
            units: self.units,
            objects: self.objects,
            context_ids: self.context_ids,
            contexts: self.contexts,
        }
    }

    /// Returns a reference to wire dictionaries without consuming the builder.
    /// This allows continued use of the builder for encoding while having the dictionaries.
    pub fn as_wire_dicts(&self) -> WireDictionaries {
        WireDictionaries {
            properties: self.properties.clone(),
            relation_types: self.relation_types.clone(),
            languages: self.languages.clone(),
            units: self.units.clone(),
            objects: self.objects.clone(),
            context_ids: self.context_ids.clone(),
            contexts: self.contexts.clone(),
        }
    }

    /// Gets the index for an existing property (for encoding).
    pub fn get_property_index(&self, id: &Id) -> Option<usize> {
        self.property_indices.get(id).copied()
    }

    /// Gets the index for an existing relation type (for encoding).
    pub fn get_relation_type_index(&self, id: &Id) -> Option<usize> {
        self.relation_type_indices.get(id).copied()
    }

    /// Gets the index for an existing language (for encoding).
    /// Returns 0 for None, 1+ for existing languages.
    pub fn get_language_index(&self, id: Option<&Id>) -> Option<usize> {
        match id {
            None => Some(0),
            Some(lang_id) => self.language_indices.get(lang_id).map(|idx| idx + 1),
        }
    }

    /// Gets the index for an existing object (for encoding).
    pub fn get_object_index(&self, id: &Id) -> Option<usize> {
        self.object_indices.get(id).copied()
    }

    /// Gets the index for an existing context ID (for encoding).
    pub fn get_context_id_index(&self, id: &Id) -> Option<usize> {
        self.context_id_indices.get(id).copied()
    }

    /// Writes the dictionaries directly to a writer (avoids cloning).
    pub fn write_dictionaries(&self, writer: &mut Writer) {
        // Properties: count + (id, data_type) pairs
        writer.write_varint(self.properties.len() as u64);
        for (id, data_type) in &self.properties {
            writer.write_id(id);
            writer.write_byte(*data_type as u8);
        }

        // Relation types
        writer.write_id_vec(&self.relation_types);

        // Languages
        writer.write_id_vec(&self.languages);

        // Units
        writer.write_id_vec(&self.units);

        // Objects
        writer.write_id_vec(&self.objects);

        // Context IDs
        writer.write_id_vec(&self.context_ids);
    }

    /// Writes the contexts array to the writer.
    ///
    /// Each context is encoded as:
    /// - root_id: varint (index into context_ids)
    /// - edge_count: varint
    /// - edges: for each edge: type_id (RelationTypeRef), to_entity_id (ContextRef)
    pub fn write_contexts(&self, writer: &mut Writer) {
        writer.write_varint(self.contexts.len() as u64);
        for ctx in &self.contexts {
            // Root ID as context_id index
            let root_idx = self.context_id_indices.get(&ctx.root_id)
                .copied()
                .expect("context root_id must be in context_ids dictionary");
            writer.write_varint(root_idx as u64);

            // Edges
            writer.write_varint(ctx.edges.len() as u64);
            for edge in &ctx.edges {
                // type_id is a RelationTypeRef (index into relation_types dictionary)
                let type_idx = self.relation_type_indices.get(&edge.type_id)
                    .copied()
                    .expect("context edge type_id must be in relation_types dictionary");
                // to_entity_id is a ContextRef (index into context_ids dictionary)
                let to_idx = self.context_id_indices.get(&edge.to_entity_id)
                    .copied()
                    .expect("context edge to_entity_id must be in context_ids dictionary");
                writer.write_varint(type_idx as u64);
                writer.write_varint(to_idx as u64);
            }
        }
    }

    /// Validates dictionary and context sizes against codec limits.
    pub fn validate_limits(&self) -> Result<(), EncodeError> {
        let max = MAX_DICT_SIZE;
        if self.properties.len() > max {
            return Err(EncodeError::LengthExceedsLimit {
                field: "properties",
                len: self.properties.len(),
                max,
            });
        }
        if self.relation_types.len() > max {
            return Err(EncodeError::LengthExceedsLimit {
                field: "relation_types",
                len: self.relation_types.len(),
                max,
            });
        }
        if self.languages.len() > max {
            return Err(EncodeError::LengthExceedsLimit {
                field: "languages",
                len: self.languages.len(),
                max,
            });
        }
        if self.units.len() > max {
            return Err(EncodeError::LengthExceedsLimit {
                field: "units",
                len: self.units.len(),
                max,
            });
        }
        if self.objects.len() > max {
            return Err(EncodeError::LengthExceedsLimit {
                field: "objects",
                len: self.objects.len(),
                max,
            });
        }
        if self.context_ids.len() > max {
            return Err(EncodeError::LengthExceedsLimit {
                field: "context_ids",
                len: self.context_ids.len(),
                max,
            });
        }
        if self.contexts.len() > max {
            return Err(EncodeError::LengthExceedsLimit {
                field: "contexts",
                len: self.contexts.len(),
                max,
            });
        }
        for ctx in &self.contexts {
            if ctx.edges.len() > max {
                return Err(EncodeError::LengthExceedsLimit {
                    field: "context_edges",
                    len: ctx.edges.len(),
                    max,
                });
            }
        }
        Ok(())
    }

    /// Converts this builder into a sorted canonical form.
    ///
    /// All dictionaries are sorted by ID bytes (lexicographic order),
    /// and the index maps are rebuilt to reflect the new ordering.
    ///
    /// This is used for canonical encoding to ensure deterministic output.
    pub fn into_sorted(self) -> Self {
        // Sort properties by ID
        let mut properties = self.properties;
        properties.sort_by(|a, b| a.0.cmp(&b.0));
        let property_indices: FxHashMap<Id, usize> = properties
            .iter()
            .enumerate()
            .map(|(i, (id, _))| (*id, i))
            .collect();

        // Sort relation types by ID
        let mut relation_types = self.relation_types;
        relation_types.sort();
        let relation_type_indices: FxHashMap<Id, usize> = relation_types
            .iter()
            .enumerate()
            .map(|(i, id)| (*id, i))
            .collect();

        // Sort languages by ID
        let mut languages = self.languages;
        languages.sort();
        let language_indices: FxHashMap<Id, usize> = languages
            .iter()
            .enumerate()
            .map(|(i, id)| (*id, i))
            .collect();

        // Sort units by ID
        let mut units = self.units;
        units.sort();
        let unit_indices: FxHashMap<Id, usize> = units
            .iter()
            .enumerate()
            .map(|(i, id)| (*id, i))
            .collect();

        // Sort objects by ID
        let mut objects = self.objects;
        objects.sort();
        let object_indices: FxHashMap<Id, usize> = objects
            .iter()
            .enumerate()
            .map(|(i, id)| (*id, i))
            .collect();

        // Sort context IDs by ID
        let mut context_ids = self.context_ids;
        context_ids.sort();
        let context_id_indices: FxHashMap<Id, usize> = context_ids
            .iter()
            .enumerate()
            .map(|(i, id)| (*id, i))
            .collect();

        // Sort contexts by root_id, then by edges (canonically)
        let mut contexts = self.contexts;
        contexts.sort_by(|a, b| {
            // First compare by root_id
            match a.root_id.cmp(&b.root_id) {
                std::cmp::Ordering::Equal => {
                    // Then compare edges lexicographically
                    let a_edges: Vec<_> = a.edges.iter().map(|e| (e.type_id, e.to_entity_id)).collect();
                    let b_edges: Vec<_> = b.edges.iter().map(|e| (e.type_id, e.to_entity_id)).collect();
                    a_edges.cmp(&b_edges)
                }
                other => other,
            }
        });
        let context_indices: FxHashMap<Context, usize> = contexts
            .iter()
            .enumerate()
            .map(|(i, ctx)| (ctx.clone(), i))
            .collect();

        Self {
            properties,
            property_indices,
            relation_types,
            relation_type_indices,
            languages,
            language_indices,
            units,
            unit_indices,
            objects,
            object_indices,
            context_ids,
            context_id_indices,
            contexts,
            context_indices,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_edit_new() {
        let id = [1u8; 16];
        let edit = Edit::new(id);
        assert_eq!(edit.id, id);
        assert!(edit.name.is_empty());
        assert!(edit.authors.is_empty());
        assert!(edit.ops.is_empty());
    }

    #[test]
    fn test_dictionary_builder() {
        let mut builder = DictionaryBuilder::new();

        let prop1 = [1u8; 16];
        let prop2 = [2u8; 16];

        // First add returns 0
        assert_eq!(builder.add_property(prop1, DataType::Text), 0);
        // Second add of same ID returns same index
        assert_eq!(builder.add_property(prop1, DataType::Text), 0);
        // Different ID gets new index
        assert_eq!(builder.add_property(prop2, DataType::Integer), 1);

        let dicts = builder.build();
        assert_eq!(dicts.properties.len(), 2);
        assert_eq!(dicts.properties[0], (prop1, DataType::Text));
        assert_eq!(dicts.properties[1], (prop2, DataType::Integer));
    }

    #[test]
    fn test_language_indexing() {
        let mut builder = DictionaryBuilder::new();

        let lang1 = [10u8; 16];
        let lang2 = [20u8; 16];

        // None returns 0
        assert_eq!(builder.add_language(None), 0);
        // First language returns 1
        assert_eq!(builder.add_language(Some(lang1)), 1);
        // Same language returns same index
        assert_eq!(builder.add_language(Some(lang1)), 1);
        // Different language returns 2
        assert_eq!(builder.add_language(Some(lang2)), 2);

        let dicts = builder.build();
        assert_eq!(dicts.languages.len(), 2);

        // get_language(0) returns None (default)
        assert!(dicts.get_language(0).is_none());
        // get_language(1) returns lang1
        assert_eq!(dicts.get_language(1), Some(&lang1));
        // get_language(2) returns lang2
        assert_eq!(dicts.get_language(2), Some(&lang2));
    }
}