use std::borrow::Cow;
use rustc_hash::FxHashMap;
use crate::codec::primitives::Writer;
use crate::error::EncodeError;
use crate::limits::MAX_DICT_SIZE;
use crate::model::{DataType, Id, Op};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ContextEdge {
pub type_id: Id,
pub to_entity_id: Id,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Context {
pub root_id: Id,
pub edges: Vec<ContextEdge>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Edit<'a> {
pub id: Id,
pub name: Cow<'a, str>,
pub authors: Vec<Id>,
pub created_at: i64,
pub ops: Vec<Op<'a>>,
}
impl<'a> Edit<'a> {
pub fn new(id: Id) -> Self {
Self {
id,
name: Cow::Borrowed(""),
authors: Vec::new(),
created_at: 0,
ops: Vec::new(),
}
}
pub fn with_name(id: Id, name: impl Into<Cow<'a, str>>) -> Self {
Self {
id,
name: name.into(),
authors: Vec::new(),
created_at: 0,
ops: Vec::new(),
}
}
}
#[derive(Debug, Clone, Default)]
pub struct WireDictionaries {
pub properties: Vec<(Id, DataType)>,
pub relation_types: Vec<Id>,
pub languages: Vec<Id>,
pub units: Vec<Id>,
pub objects: Vec<Id>,
pub context_ids: Vec<Id>,
pub contexts: Vec<Context>,
}
impl WireDictionaries {
pub fn new() -> Self {
Self::default()
}
pub fn get_property(&self, index: usize) -> Option<&(Id, DataType)> {
self.properties.get(index)
}
pub fn get_relation_type(&self, index: usize) -> Option<&Id> {
self.relation_types.get(index)
}
pub fn get_language(&self, index: usize) -> Option<&Id> {
if index == 0 {
None
} else {
self.languages.get(index - 1)
}
}
pub fn get_unit(&self, index: usize) -> Option<&Id> {
if index == 0 {
None
} else {
self.units.get(index - 1)
}
}
pub fn get_object(&self, index: usize) -> Option<&Id> {
self.objects.get(index)
}
pub fn get_context_id(&self, index: usize) -> Option<&Id> {
self.context_ids.get(index)
}
pub fn get_context(&self, index: usize) -> Option<&Context> {
self.contexts.get(index)
}
}
#[derive(Debug, Clone, Default)]
pub struct DictionaryBuilder {
properties: Vec<(Id, DataType)>,
property_indices: FxHashMap<Id, usize>,
relation_types: Vec<Id>,
relation_type_indices: FxHashMap<Id, usize>,
languages: Vec<Id>,
language_indices: FxHashMap<Id, usize>,
units: Vec<Id>,
unit_indices: FxHashMap<Id, usize>,
objects: Vec<Id>,
object_indices: FxHashMap<Id, usize>,
context_ids: Vec<Id>,
context_id_indices: FxHashMap<Id, usize>,
contexts: Vec<Context>,
context_indices: FxHashMap<Context, usize>,
}
impl DictionaryBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn with_capacity(estimated_ops: usize) -> Self {
let prop_cap = estimated_ops / 4 + 1;
let rel_cap = estimated_ops / 20 + 1;
let lang_cap = 4;
let unit_cap = 4;
let obj_cap = estimated_ops / 2 + 1;
let ctx_id_cap = 8;
let ctx_cap = 4;
Self {
properties: Vec::with_capacity(prop_cap),
property_indices: FxHashMap::with_capacity_and_hasher(prop_cap, Default::default()),
relation_types: Vec::with_capacity(rel_cap),
relation_type_indices: FxHashMap::with_capacity_and_hasher(rel_cap, Default::default()),
languages: Vec::with_capacity(lang_cap),
language_indices: FxHashMap::with_capacity_and_hasher(lang_cap, Default::default()),
units: Vec::with_capacity(unit_cap),
unit_indices: FxHashMap::with_capacity_and_hasher(unit_cap, Default::default()),
objects: Vec::with_capacity(obj_cap),
object_indices: FxHashMap::with_capacity_and_hasher(obj_cap, Default::default()),
context_ids: Vec::with_capacity(ctx_id_cap),
context_id_indices: FxHashMap::with_capacity_and_hasher(ctx_id_cap, Default::default()),
contexts: Vec::with_capacity(ctx_cap),
context_indices: FxHashMap::with_capacity_and_hasher(ctx_cap, Default::default()),
}
}
pub fn add_property(&mut self, id: Id, data_type: DataType) -> usize {
if let Some(&idx) = self.property_indices.get(&id) {
idx
} else {
let idx = self.properties.len();
self.properties.push((id, data_type));
self.property_indices.insert(id, idx);
idx
}
}
pub fn add_relation_type(&mut self, id: Id) -> usize {
if let Some(&idx) = self.relation_type_indices.get(&id) {
idx
} else {
let idx = self.relation_types.len();
self.relation_types.push(id);
self.relation_type_indices.insert(id, idx);
idx
}
}
pub fn add_language(&mut self, id: Option<Id>) -> usize {
match id {
None => 0,
Some(lang_id) => {
if let Some(&idx) = self.language_indices.get(&lang_id) {
idx + 1
} else {
let idx = self.languages.len();
self.languages.push(lang_id);
self.language_indices.insert(lang_id, idx);
idx + 1
}
}
}
}
pub fn add_unit(&mut self, id: Option<Id>) -> usize {
match id {
None => 0,
Some(unit_id) => {
if let Some(&idx) = self.unit_indices.get(&unit_id) {
idx + 1
} else {
let idx = self.units.len();
self.units.push(unit_id);
self.unit_indices.insert(unit_id, idx);
idx + 1
}
}
}
}
pub fn add_object(&mut self, id: Id) -> usize {
if let Some(&idx) = self.object_indices.get(&id) {
idx
} else {
let idx = self.objects.len();
self.objects.push(id);
self.object_indices.insert(id, idx);
idx
}
}
pub fn add_context_id(&mut self, id: Id) -> usize {
if let Some(&idx) = self.context_id_indices.get(&id) {
idx
} else {
let idx = self.context_ids.len();
self.context_ids.push(id);
self.context_id_indices.insert(id, idx);
idx
}
}
pub fn add_context(&mut self, context: &Context) -> usize {
if let Some(&idx) = self.context_indices.get(context) {
idx
} else {
self.add_context_id(context.root_id);
for edge in &context.edges {
self.add_relation_type(edge.type_id);
self.add_context_id(edge.to_entity_id);
}
let idx = self.contexts.len();
self.contexts.push(context.clone());
self.context_indices.insert(context.clone(), idx);
idx
}
}
pub fn get_context_index(&self, context: &Context) -> Option<usize> {
self.context_indices.get(context).copied()
}
pub fn build(self) -> WireDictionaries {
WireDictionaries {
properties: self.properties,
relation_types: self.relation_types,
languages: self.languages,
units: self.units,
objects: self.objects,
context_ids: self.context_ids,
contexts: self.contexts,
}
}
pub fn as_wire_dicts(&self) -> WireDictionaries {
WireDictionaries {
properties: self.properties.clone(),
relation_types: self.relation_types.clone(),
languages: self.languages.clone(),
units: self.units.clone(),
objects: self.objects.clone(),
context_ids: self.context_ids.clone(),
contexts: self.contexts.clone(),
}
}
pub fn get_property_index(&self, id: &Id) -> Option<usize> {
self.property_indices.get(id).copied()
}
pub fn get_relation_type_index(&self, id: &Id) -> Option<usize> {
self.relation_type_indices.get(id).copied()
}
pub fn get_language_index(&self, id: Option<&Id>) -> Option<usize> {
match id {
None => Some(0),
Some(lang_id) => self.language_indices.get(lang_id).map(|idx| idx + 1),
}
}
pub fn get_object_index(&self, id: &Id) -> Option<usize> {
self.object_indices.get(id).copied()
}
pub fn get_context_id_index(&self, id: &Id) -> Option<usize> {
self.context_id_indices.get(id).copied()
}
pub fn write_dictionaries(&self, writer: &mut Writer) {
writer.write_varint(self.properties.len() as u64);
for (id, data_type) in &self.properties {
writer.write_id(id);
writer.write_byte(*data_type as u8);
}
writer.write_id_vec(&self.relation_types);
writer.write_id_vec(&self.languages);
writer.write_id_vec(&self.units);
writer.write_id_vec(&self.objects);
writer.write_id_vec(&self.context_ids);
}
pub fn write_contexts(&self, writer: &mut Writer) {
writer.write_varint(self.contexts.len() as u64);
for ctx in &self.contexts {
let root_idx = self.context_id_indices.get(&ctx.root_id)
.copied()
.expect("context root_id must be in context_ids dictionary");
writer.write_varint(root_idx as u64);
writer.write_varint(ctx.edges.len() as u64);
for edge in &ctx.edges {
let type_idx = self.relation_type_indices.get(&edge.type_id)
.copied()
.expect("context edge type_id must be in relation_types dictionary");
let to_idx = self.context_id_indices.get(&edge.to_entity_id)
.copied()
.expect("context edge to_entity_id must be in context_ids dictionary");
writer.write_varint(type_idx as u64);
writer.write_varint(to_idx as u64);
}
}
}
pub fn validate_limits(&self) -> Result<(), EncodeError> {
let max = MAX_DICT_SIZE;
if self.properties.len() > max {
return Err(EncodeError::LengthExceedsLimit {
field: "properties",
len: self.properties.len(),
max,
});
}
if self.relation_types.len() > max {
return Err(EncodeError::LengthExceedsLimit {
field: "relation_types",
len: self.relation_types.len(),
max,
});
}
if self.languages.len() > max {
return Err(EncodeError::LengthExceedsLimit {
field: "languages",
len: self.languages.len(),
max,
});
}
if self.units.len() > max {
return Err(EncodeError::LengthExceedsLimit {
field: "units",
len: self.units.len(),
max,
});
}
if self.objects.len() > max {
return Err(EncodeError::LengthExceedsLimit {
field: "objects",
len: self.objects.len(),
max,
});
}
if self.context_ids.len() > max {
return Err(EncodeError::LengthExceedsLimit {
field: "context_ids",
len: self.context_ids.len(),
max,
});
}
if self.contexts.len() > max {
return Err(EncodeError::LengthExceedsLimit {
field: "contexts",
len: self.contexts.len(),
max,
});
}
for ctx in &self.contexts {
if ctx.edges.len() > max {
return Err(EncodeError::LengthExceedsLimit {
field: "context_edges",
len: ctx.edges.len(),
max,
});
}
}
Ok(())
}
pub fn into_sorted(self) -> Self {
let mut properties = self.properties;
properties.sort_by(|a, b| a.0.cmp(&b.0));
let property_indices: FxHashMap<Id, usize> = properties
.iter()
.enumerate()
.map(|(i, (id, _))| (*id, i))
.collect();
let mut relation_types = self.relation_types;
relation_types.sort();
let relation_type_indices: FxHashMap<Id, usize> = relation_types
.iter()
.enumerate()
.map(|(i, id)| (*id, i))
.collect();
let mut languages = self.languages;
languages.sort();
let language_indices: FxHashMap<Id, usize> = languages
.iter()
.enumerate()
.map(|(i, id)| (*id, i))
.collect();
let mut units = self.units;
units.sort();
let unit_indices: FxHashMap<Id, usize> = units
.iter()
.enumerate()
.map(|(i, id)| (*id, i))
.collect();
let mut objects = self.objects;
objects.sort();
let object_indices: FxHashMap<Id, usize> = objects
.iter()
.enumerate()
.map(|(i, id)| (*id, i))
.collect();
let mut context_ids = self.context_ids;
context_ids.sort();
let context_id_indices: FxHashMap<Id, usize> = context_ids
.iter()
.enumerate()
.map(|(i, id)| (*id, i))
.collect();
let mut contexts = self.contexts;
contexts.sort_by(|a, b| {
match a.root_id.cmp(&b.root_id) {
std::cmp::Ordering::Equal => {
let a_edges: Vec<_> = a.edges.iter().map(|e| (e.type_id, e.to_entity_id)).collect();
let b_edges: Vec<_> = b.edges.iter().map(|e| (e.type_id, e.to_entity_id)).collect();
a_edges.cmp(&b_edges)
}
other => other,
}
});
let context_indices: FxHashMap<Context, usize> = contexts
.iter()
.enumerate()
.map(|(i, ctx)| (ctx.clone(), i))
.collect();
Self {
properties,
property_indices,
relation_types,
relation_type_indices,
languages,
language_indices,
units,
unit_indices,
objects,
object_indices,
context_ids,
context_id_indices,
contexts,
context_indices,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_edit_new() {
let id = [1u8; 16];
let edit = Edit::new(id);
assert_eq!(edit.id, id);
assert!(edit.name.is_empty());
assert!(edit.authors.is_empty());
assert!(edit.ops.is_empty());
}
#[test]
fn test_dictionary_builder() {
let mut builder = DictionaryBuilder::new();
let prop1 = [1u8; 16];
let prop2 = [2u8; 16];
assert_eq!(builder.add_property(prop1, DataType::Text), 0);
assert_eq!(builder.add_property(prop1, DataType::Text), 0);
assert_eq!(builder.add_property(prop2, DataType::Integer), 1);
let dicts = builder.build();
assert_eq!(dicts.properties.len(), 2);
assert_eq!(dicts.properties[0], (prop1, DataType::Text));
assert_eq!(dicts.properties[1], (prop2, DataType::Integer));
}
#[test]
fn test_language_indexing() {
let mut builder = DictionaryBuilder::new();
let lang1 = [10u8; 16];
let lang2 = [20u8; 16];
assert_eq!(builder.add_language(None), 0);
assert_eq!(builder.add_language(Some(lang1)), 1);
assert_eq!(builder.add_language(Some(lang1)), 1);
assert_eq!(builder.add_language(Some(lang2)), 2);
let dicts = builder.build();
assert_eq!(dicts.languages.len(), 2);
assert!(dicts.get_language(0).is_none());
assert_eq!(dicts.get_language(1), Some(&lang1));
assert_eq!(dicts.get_language(2), Some(&lang2));
}
}