1use std::borrow::Cow;
6
7use rustc_hash::FxHashMap;
8
9use crate::codec::primitives::Writer;
10use crate::error::EncodeError;
11use crate::limits::MAX_DICT_SIZE;
12use crate::model::{DataType, Id, Op};
13
14#[derive(Debug, Clone, PartialEq, Eq, Hash)]
18pub struct ContextEdge {
19 pub type_id: Id,
21 pub to_entity_id: Id,
23}
24
25#[derive(Debug, Clone, PartialEq, Eq, Hash)]
31pub struct Context {
32 pub root_id: Id,
34 pub edges: Vec<ContextEdge>,
36}
37
38#[derive(Debug, Clone, PartialEq)]
43pub struct Edit<'a> {
44 pub id: Id,
46 pub name: Cow<'a, str>,
48 pub authors: Vec<Id>,
50 pub created_at: i64,
52 pub ops: Vec<Op<'a>>,
54}
55
56impl<'a> Edit<'a> {
57 pub fn new(id: Id) -> Self {
59 Self {
60 id,
61 name: Cow::Borrowed(""),
62 authors: Vec::new(),
63 created_at: 0,
64 ops: Vec::new(),
65 }
66 }
67
68 pub fn with_name(id: Id, name: impl Into<Cow<'a, str>>) -> Self {
70 Self {
71 id,
72 name: name.into(),
73 authors: Vec::new(),
74 created_at: 0,
75 ops: Vec::new(),
76 }
77 }
78}
79
80#[derive(Debug, Clone, Default)]
85pub struct WireDictionaries {
86 pub properties: Vec<(Id, DataType)>,
88 pub relation_types: Vec<Id>,
90 pub languages: Vec<Id>,
92 pub units: Vec<Id>,
94 pub objects: Vec<Id>,
96 pub context_ids: Vec<Id>,
98 pub contexts: Vec<Context>,
100}
101
102impl WireDictionaries {
103 pub fn new() -> Self {
105 Self::default()
106 }
107
108 pub fn get_property(&self, index: usize) -> Option<&(Id, DataType)> {
110 self.properties.get(index)
111 }
112
113 pub fn get_relation_type(&self, index: usize) -> Option<&Id> {
115 self.relation_types.get(index)
116 }
117
118 pub fn get_language(&self, index: usize) -> Option<&Id> {
123 if index == 0 {
124 None
125 } else {
126 self.languages.get(index - 1)
127 }
128 }
129
130 pub fn get_unit(&self, index: usize) -> Option<&Id> {
135 if index == 0 {
136 None
137 } else {
138 self.units.get(index - 1)
139 }
140 }
141
142 pub fn get_object(&self, index: usize) -> Option<&Id> {
144 self.objects.get(index)
145 }
146
147 pub fn get_context_id(&self, index: usize) -> Option<&Id> {
149 self.context_ids.get(index)
150 }
151
152 pub fn get_context(&self, index: usize) -> Option<&Context> {
154 self.contexts.get(index)
155 }
156}
157
158#[derive(Debug, Clone, Default)]
162pub struct DictionaryBuilder {
163 properties: Vec<(Id, DataType)>,
164 property_indices: FxHashMap<Id, usize>,
165 relation_types: Vec<Id>,
166 relation_type_indices: FxHashMap<Id, usize>,
167 languages: Vec<Id>,
168 language_indices: FxHashMap<Id, usize>,
169 units: Vec<Id>,
170 unit_indices: FxHashMap<Id, usize>,
171 objects: Vec<Id>,
172 object_indices: FxHashMap<Id, usize>,
173 context_ids: Vec<Id>,
174 context_id_indices: FxHashMap<Id, usize>,
175 contexts: Vec<Context>,
176 context_indices: FxHashMap<Context, usize>,
177}
178
179impl DictionaryBuilder {
180 pub fn new() -> Self {
182 Self::default()
183 }
184
185 pub fn with_capacity(estimated_ops: usize) -> Self {
196 let prop_cap = estimated_ops / 4 + 1;
197 let rel_cap = estimated_ops / 20 + 1;
198 let lang_cap = 4;
199 let unit_cap = 4;
200 let obj_cap = estimated_ops / 2 + 1;
201 let ctx_id_cap = 8;
202 let ctx_cap = 4;
203
204 Self {
205 properties: Vec::with_capacity(prop_cap),
206 property_indices: FxHashMap::with_capacity_and_hasher(prop_cap, Default::default()),
207 relation_types: Vec::with_capacity(rel_cap),
208 relation_type_indices: FxHashMap::with_capacity_and_hasher(rel_cap, Default::default()),
209 languages: Vec::with_capacity(lang_cap),
210 language_indices: FxHashMap::with_capacity_and_hasher(lang_cap, Default::default()),
211 units: Vec::with_capacity(unit_cap),
212 unit_indices: FxHashMap::with_capacity_and_hasher(unit_cap, Default::default()),
213 objects: Vec::with_capacity(obj_cap),
214 object_indices: FxHashMap::with_capacity_and_hasher(obj_cap, Default::default()),
215 context_ids: Vec::with_capacity(ctx_id_cap),
216 context_id_indices: FxHashMap::with_capacity_and_hasher(ctx_id_cap, Default::default()),
217 contexts: Vec::with_capacity(ctx_cap),
218 context_indices: FxHashMap::with_capacity_and_hasher(ctx_cap, Default::default()),
219 }
220 }
221
222 pub fn add_property(&mut self, id: Id, data_type: DataType) -> usize {
224 if let Some(&idx) = self.property_indices.get(&id) {
225 idx
226 } else {
227 let idx = self.properties.len();
228 self.properties.push((id, data_type));
229 self.property_indices.insert(id, idx);
230 idx
231 }
232 }
233
234 pub fn add_relation_type(&mut self, id: Id) -> usize {
236 if let Some(&idx) = self.relation_type_indices.get(&id) {
237 idx
238 } else {
239 let idx = self.relation_types.len();
240 self.relation_types.push(id);
241 self.relation_type_indices.insert(id, idx);
242 idx
243 }
244 }
245
246 pub fn add_language(&mut self, id: Option<Id>) -> usize {
250 match id {
251 None => 0,
252 Some(lang_id) => {
253 if let Some(&idx) = self.language_indices.get(&lang_id) {
254 idx + 1
255 } else {
256 let idx = self.languages.len();
257 self.languages.push(lang_id);
258 self.language_indices.insert(lang_id, idx);
259 idx + 1
260 }
261 }
262 }
263 }
264
265 pub fn add_unit(&mut self, id: Option<Id>) -> usize {
269 match id {
270 None => 0,
271 Some(unit_id) => {
272 if let Some(&idx) = self.unit_indices.get(&unit_id) {
273 idx + 1
274 } else {
275 let idx = self.units.len();
276 self.units.push(unit_id);
277 self.unit_indices.insert(unit_id, idx);
278 idx + 1
279 }
280 }
281 }
282 }
283
284 pub fn add_object(&mut self, id: Id) -> usize {
286 if let Some(&idx) = self.object_indices.get(&id) {
287 idx
288 } else {
289 let idx = self.objects.len();
290 self.objects.push(id);
291 self.object_indices.insert(id, idx);
292 idx
293 }
294 }
295
296 pub fn add_context_id(&mut self, id: Id) -> usize {
298 if let Some(&idx) = self.context_id_indices.get(&id) {
299 idx
300 } else {
301 let idx = self.context_ids.len();
302 self.context_ids.push(id);
303 self.context_id_indices.insert(id, idx);
304 idx
305 }
306 }
307
308 pub fn add_context(&mut self, context: &Context) -> usize {
315 if let Some(&idx) = self.context_indices.get(context) {
316 idx
317 } else {
318 self.add_context_id(context.root_id);
320 for edge in &context.edges {
321 self.add_relation_type(edge.type_id);
323 self.add_context_id(edge.to_entity_id);
324 }
325
326 let idx = self.contexts.len();
328 self.contexts.push(context.clone());
329 self.context_indices.insert(context.clone(), idx);
330 idx
331 }
332 }
333
334 pub fn get_context_index(&self, context: &Context) -> Option<usize> {
336 self.context_indices.get(context).copied()
337 }
338
339 pub fn build(self) -> WireDictionaries {
341 WireDictionaries {
342 properties: self.properties,
343 relation_types: self.relation_types,
344 languages: self.languages,
345 units: self.units,
346 objects: self.objects,
347 context_ids: self.context_ids,
348 contexts: self.contexts,
349 }
350 }
351
352 pub fn as_wire_dicts(&self) -> WireDictionaries {
355 WireDictionaries {
356 properties: self.properties.clone(),
357 relation_types: self.relation_types.clone(),
358 languages: self.languages.clone(),
359 units: self.units.clone(),
360 objects: self.objects.clone(),
361 context_ids: self.context_ids.clone(),
362 contexts: self.contexts.clone(),
363 }
364 }
365
366 pub fn get_property_index(&self, id: &Id) -> Option<usize> {
368 self.property_indices.get(id).copied()
369 }
370
371 pub fn get_relation_type_index(&self, id: &Id) -> Option<usize> {
373 self.relation_type_indices.get(id).copied()
374 }
375
376 pub fn get_language_index(&self, id: Option<&Id>) -> Option<usize> {
379 match id {
380 None => Some(0),
381 Some(lang_id) => self.language_indices.get(lang_id).map(|idx| idx + 1),
382 }
383 }
384
385 pub fn get_object_index(&self, id: &Id) -> Option<usize> {
387 self.object_indices.get(id).copied()
388 }
389
390 pub fn get_context_id_index(&self, id: &Id) -> Option<usize> {
392 self.context_id_indices.get(id).copied()
393 }
394
395 pub fn write_dictionaries(&self, writer: &mut Writer) {
397 writer.write_varint(self.properties.len() as u64);
399 for (id, data_type) in &self.properties {
400 writer.write_id(id);
401 writer.write_byte(*data_type as u8);
402 }
403
404 writer.write_id_vec(&self.relation_types);
406
407 writer.write_id_vec(&self.languages);
409
410 writer.write_id_vec(&self.units);
412
413 writer.write_id_vec(&self.objects);
415
416 writer.write_id_vec(&self.context_ids);
418 }
419
420 pub fn write_contexts(&self, writer: &mut Writer) {
427 writer.write_varint(self.contexts.len() as u64);
428 for ctx in &self.contexts {
429 let root_idx = self.context_id_indices.get(&ctx.root_id)
431 .copied()
432 .expect("context root_id must be in context_ids dictionary");
433 writer.write_varint(root_idx as u64);
434
435 writer.write_varint(ctx.edges.len() as u64);
437 for edge in &ctx.edges {
438 let type_idx = self.relation_type_indices.get(&edge.type_id)
440 .copied()
441 .expect("context edge type_id must be in relation_types dictionary");
442 let to_idx = self.context_id_indices.get(&edge.to_entity_id)
444 .copied()
445 .expect("context edge to_entity_id must be in context_ids dictionary");
446 writer.write_varint(type_idx as u64);
447 writer.write_varint(to_idx as u64);
448 }
449 }
450 }
451
452 pub fn validate_limits(&self) -> Result<(), EncodeError> {
454 let max = MAX_DICT_SIZE;
455 if self.properties.len() > max {
456 return Err(EncodeError::LengthExceedsLimit {
457 field: "properties",
458 len: self.properties.len(),
459 max,
460 });
461 }
462 if self.relation_types.len() > max {
463 return Err(EncodeError::LengthExceedsLimit {
464 field: "relation_types",
465 len: self.relation_types.len(),
466 max,
467 });
468 }
469 if self.languages.len() > max {
470 return Err(EncodeError::LengthExceedsLimit {
471 field: "languages",
472 len: self.languages.len(),
473 max,
474 });
475 }
476 if self.units.len() > max {
477 return Err(EncodeError::LengthExceedsLimit {
478 field: "units",
479 len: self.units.len(),
480 max,
481 });
482 }
483 if self.objects.len() > max {
484 return Err(EncodeError::LengthExceedsLimit {
485 field: "objects",
486 len: self.objects.len(),
487 max,
488 });
489 }
490 if self.context_ids.len() > max {
491 return Err(EncodeError::LengthExceedsLimit {
492 field: "context_ids",
493 len: self.context_ids.len(),
494 max,
495 });
496 }
497 if self.contexts.len() > max {
498 return Err(EncodeError::LengthExceedsLimit {
499 field: "contexts",
500 len: self.contexts.len(),
501 max,
502 });
503 }
504 for ctx in &self.contexts {
505 if ctx.edges.len() > max {
506 return Err(EncodeError::LengthExceedsLimit {
507 field: "context_edges",
508 len: ctx.edges.len(),
509 max,
510 });
511 }
512 }
513 Ok(())
514 }
515
516 pub fn into_sorted(self) -> Self {
523 let mut properties = self.properties;
525 properties.sort_by(|a, b| a.0.cmp(&b.0));
526 let property_indices: FxHashMap<Id, usize> = properties
527 .iter()
528 .enumerate()
529 .map(|(i, (id, _))| (*id, i))
530 .collect();
531
532 let mut relation_types = self.relation_types;
534 relation_types.sort();
535 let relation_type_indices: FxHashMap<Id, usize> = relation_types
536 .iter()
537 .enumerate()
538 .map(|(i, id)| (*id, i))
539 .collect();
540
541 let mut languages = self.languages;
543 languages.sort();
544 let language_indices: FxHashMap<Id, usize> = languages
545 .iter()
546 .enumerate()
547 .map(|(i, id)| (*id, i))
548 .collect();
549
550 let mut units = self.units;
552 units.sort();
553 let unit_indices: FxHashMap<Id, usize> = units
554 .iter()
555 .enumerate()
556 .map(|(i, id)| (*id, i))
557 .collect();
558
559 let mut objects = self.objects;
561 objects.sort();
562 let object_indices: FxHashMap<Id, usize> = objects
563 .iter()
564 .enumerate()
565 .map(|(i, id)| (*id, i))
566 .collect();
567
568 let mut context_ids = self.context_ids;
570 context_ids.sort();
571 let context_id_indices: FxHashMap<Id, usize> = context_ids
572 .iter()
573 .enumerate()
574 .map(|(i, id)| (*id, i))
575 .collect();
576
577 let mut contexts = self.contexts;
579 contexts.sort_by(|a, b| {
580 match a.root_id.cmp(&b.root_id) {
582 std::cmp::Ordering::Equal => {
583 let a_edges: Vec<_> = a.edges.iter().map(|e| (e.type_id, e.to_entity_id)).collect();
585 let b_edges: Vec<_> = b.edges.iter().map(|e| (e.type_id, e.to_entity_id)).collect();
586 a_edges.cmp(&b_edges)
587 }
588 other => other,
589 }
590 });
591 let context_indices: FxHashMap<Context, usize> = contexts
592 .iter()
593 .enumerate()
594 .map(|(i, ctx)| (ctx.clone(), i))
595 .collect();
596
597 Self {
598 properties,
599 property_indices,
600 relation_types,
601 relation_type_indices,
602 languages,
603 language_indices,
604 units,
605 unit_indices,
606 objects,
607 object_indices,
608 context_ids,
609 context_id_indices,
610 contexts,
611 context_indices,
612 }
613 }
614}
615
616#[cfg(test)]
617mod tests {
618 use super::*;
619
620 #[test]
621 fn test_edit_new() {
622 let id = [1u8; 16];
623 let edit = Edit::new(id);
624 assert_eq!(edit.id, id);
625 assert!(edit.name.is_empty());
626 assert!(edit.authors.is_empty());
627 assert!(edit.ops.is_empty());
628 }
629
630 #[test]
631 fn test_dictionary_builder() {
632 let mut builder = DictionaryBuilder::new();
633
634 let prop1 = [1u8; 16];
635 let prop2 = [2u8; 16];
636
637 assert_eq!(builder.add_property(prop1, DataType::Text), 0);
639 assert_eq!(builder.add_property(prop1, DataType::Text), 0);
641 assert_eq!(builder.add_property(prop2, DataType::Int64), 1);
643
644 let dicts = builder.build();
645 assert_eq!(dicts.properties.len(), 2);
646 assert_eq!(dicts.properties[0], (prop1, DataType::Text));
647 assert_eq!(dicts.properties[1], (prop2, DataType::Int64));
648 }
649
650 #[test]
651 fn test_language_indexing() {
652 let mut builder = DictionaryBuilder::new();
653
654 let lang1 = [10u8; 16];
655 let lang2 = [20u8; 16];
656
657 assert_eq!(builder.add_language(None), 0);
659 assert_eq!(builder.add_language(Some(lang1)), 1);
661 assert_eq!(builder.add_language(Some(lang1)), 1);
663 assert_eq!(builder.add_language(Some(lang2)), 2);
665
666 let dicts = builder.build();
667 assert_eq!(dicts.languages.len(), 2);
668
669 assert!(dicts.get_language(0).is_none());
671 assert_eq!(dicts.get_language(1), Some(&lang1));
673 assert_eq!(dicts.get_language(2), Some(&lang2));
675 }
676}