use std::{
collections::BTreeMap,
convert::{TryFrom, TryInto},
};
use crate::document::*;
use crate::entry::*;
pub use compress::*;
use element::Parser;
use query::{NewQuery, Query};
use crate::error::{Error, Result};
use crate::validator::{Checklist, DataChecklist, Validator};
use crate::*;
use serde::{Deserialize, Serialize};
#[inline]
fn compress_is_default(val: &Compress) -> bool {
if let Compress::General { algorithm, level } = val {
*algorithm == ALGORITHM_ZSTD && *level == 3
} else {
false
}
}
#[inline]
fn int_is_zero(v: &Integer) -> bool {
v.as_u64().map(|v| v == 0).unwrap_or(false)
}
#[inline]
fn u8_is_zero(v: &u8) -> bool {
*v == 0
}
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
struct InnerSchema {
doc: Validator,
#[serde(skip_serializing_if = "String::is_empty", default)]
description: String,
#[serde(skip_serializing_if = "compress_is_default", default)]
doc_compress: Compress,
#[serde(skip_serializing_if = "BTreeMap::is_empty", default)]
entries: BTreeMap<String, EntrySchema>,
#[serde(skip_serializing_if = "String::is_empty", default)]
name: String,
#[serde(skip_serializing_if = "BTreeMap::is_empty", default)]
types: BTreeMap<String, Validator>,
#[serde(skip_serializing_if = "int_is_zero", default)]
version: Integer,
#[serde(skip_serializing_if = "u8_is_zero", default)]
max_regex: u8,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
struct EntrySchema {
entry: Validator,
#[serde(skip_serializing_if = "compress_is_default", default)]
compress: Compress,
}
pub struct NoSchema;
impl NoSchema {
pub fn validate_new_doc(doc: NewDocument) -> Result<Document> {
if let Some(schema) = doc.schema_hash() {
return Err(Error::SchemaMismatch {
actual: Some(schema.to_owned()),
expected: None,
});
}
let types = BTreeMap::new();
let parser = Parser::new(doc.data());
let (parser, _) = Validator::Any.validate(&types, parser, None)?;
parser.finish()?;
Ok(Document::from_new(doc))
}
pub fn encode_doc(doc: Document) -> Result<(Hash, Vec<u8>)> {
if let Some(schema) = doc.schema_hash() {
return Err(Error::SchemaMismatch {
actual: Some(schema.to_owned()),
expected: None,
});
}
let (hash, doc, compression) = doc.complete();
let compression = match compression {
None => Compress::General {
algorithm: 0,
level: 3,
},
Some(None) => Compress::None,
Some(Some(level)) => Compress::General {
algorithm: 0,
level,
},
};
Ok((hash, compress_doc(doc, &compression)))
}
pub fn decode_doc(doc: Vec<u8>) -> Result<Document> {
let split = SplitDoc::split(&doc)?;
if !split.hash_raw.is_empty() {
return Err(Error::SchemaMismatch {
actual: split.hash_raw.try_into().ok(),
expected: None,
});
}
let doc = Document::new(decompress_doc(doc, &Compress::None)?)?;
let types = BTreeMap::new();
let parser = Parser::new(doc.data());
let (parser, _) = Validator::Any.validate(&types, parser, None)?;
parser.finish()?;
Ok(doc)
}
pub fn trusted_decode_doc(doc: Vec<u8>) -> Result<Document> {
let split = SplitDoc::split(&doc)?;
if !split.hash_raw.is_empty() {
return Err(Error::SchemaMismatch {
actual: split.hash_raw.try_into().ok(),
expected: None,
});
}
let doc = Document::new(decompress_doc(doc, &Compress::None)?)?;
Ok(doc)
}
}
fn compress_doc(doc: Vec<u8>, compression: &Compress) -> Vec<u8> {
if let Compress::None = compression {
return doc;
}
let split = SplitDoc::split(&doc).unwrap();
let header_len = doc.len() - split.data.len() - split.signature_raw.len();
let max_len = zstd_safe::compress_bound(split.data.len());
let mut compress = Vec::with_capacity(doc.len() + max_len - split.data.len());
compress.extend_from_slice(&doc[..header_len]);
match compression.compress(compress, split.data) {
Ok(mut compress) => {
let data_len = (compress.len() - header_len).to_le_bytes();
compress[0] = CompressType::type_of(compression).into();
compress[header_len - 3] = data_len[0];
compress[header_len - 2] = data_len[1];
compress[header_len - 1] = data_len[2];
compress.extend_from_slice(split.signature_raw);
compress
}
Err(()) => doc,
}
}
fn decompress_doc(compress: Vec<u8>, compression: &Compress) -> Result<Vec<u8>> {
let split = SplitDoc::split(&compress)?;
let marker = CompressType::try_from(split.compress_raw)
.map_err(|m| Error::BadHeader(format!("unrecognized compression marker 0x{:x}", m)))?;
if let CompressType::NoCompress = marker {
return Ok(compress);
}
let header_len = compress.len() - split.data.len() - split.signature_raw.len();
let mut doc = Vec::new();
doc.extend_from_slice(&compress[..header_len]);
let mut doc = compression.decompress(
doc,
split.data,
marker,
split.signature_raw.len(),
MAX_DOC_SIZE,
)?;
let data_len = (doc.len() - header_len).to_le_bytes();
doc[0] = CompressType::NoCompress.into();
doc[header_len - 3] = data_len[0];
doc[header_len - 2] = data_len[1];
doc[header_len - 1] = data_len[2];
doc.extend_from_slice(split.signature_raw);
Ok(doc)
}
fn compress_entry(entry: Vec<u8>, compression: &Compress) -> Vec<u8> {
if let Compress::None = compression {
return entry;
}
let split = SplitEntry::split(&entry).unwrap();
let max_len = zstd_safe::compress_bound(split.data.len());
let mut compress = Vec::with_capacity(entry.len() + max_len - split.data.len());
compress.extend_from_slice(&entry[..ENTRY_PREFIX_LEN]);
match compression.compress(compress, split.data) {
Ok(mut compress) => {
let data_len = (compress.len() - ENTRY_PREFIX_LEN).to_le_bytes();
compress[0] = CompressType::type_of(compression).into();
compress[1] = data_len[0];
compress[2] = data_len[1];
compress.extend_from_slice(split.signature_raw);
compress
}
Err(()) => entry,
}
}
fn decompress_entry(compress: Vec<u8>, compression: &Compress) -> Result<Vec<u8>> {
let split = SplitEntry::split(&compress)?;
let marker = CompressType::try_from(split.compress_raw)
.map_err(|m| Error::BadHeader(format!("unrecognized compression marker 0x{:x}", m)))?;
if let CompressType::NoCompress = marker {
return Ok(compress);
}
let mut entry = Vec::new();
entry.extend_from_slice(&compress[..ENTRY_PREFIX_LEN]);
let mut entry = compression.decompress(
entry,
split.data,
marker,
split.signature_raw.len(),
MAX_ENTRY_SIZE,
)?;
let data_len = (entry.len() - ENTRY_PREFIX_LEN).to_le_bytes();
entry[0] = CompressType::NoCompress.into();
entry[1] = data_len[0];
entry[2] = data_len[1];
entry.extend_from_slice(split.signature_raw);
Ok(entry)
}
pub struct SchemaBuilder {
inner: InnerSchema,
}
impl SchemaBuilder {
pub fn new(doc: Validator) -> Self {
Self {
inner: InnerSchema {
doc,
description: String::default(),
doc_compress: Compress::default(),
entries: BTreeMap::new(),
name: String::default(),
types: BTreeMap::new(),
version: Integer::default(),
max_regex: 0,
},
}
}
pub fn description(mut self, description: &str) -> Self {
self.inner.description = description.to_owned();
self
}
pub fn doc_compress(mut self, doc_compress: Compress) -> Self {
self.inner.doc_compress = doc_compress;
self
}
pub fn entry_add(
mut self,
entry: &str,
validator: Validator,
compress: Option<Compress>,
) -> Self {
let compress = compress.unwrap_or_default();
self.inner.entries.insert(
entry.to_owned(),
EntrySchema {
entry: validator,
compress,
},
);
self
}
pub fn name(mut self, name: &str) -> Self {
self.inner.name = name.to_owned();
self
}
pub fn type_add(mut self, type_ref: &str, validator: Validator) -> Self {
self.inner.types.insert(type_ref.to_owned(), validator);
self
}
pub fn version<T: Into<Integer>>(mut self, version: T) -> Self {
self.inner.version = version.into();
self
}
pub fn build(self) -> Result<Document> {
let doc = NewDocument::new(self.inner, None)?;
NoSchema::validate_new_doc(doc)
}
}
pub struct Schema {
hash: Hash,
inner: InnerSchema,
}
impl Schema {
pub fn from_doc(doc: &Document) -> Result<Self> {
let inner = doc.deserialize()?;
let hash = doc.hash();
Ok(Self { hash, inner })
}
pub fn hash(&self) -> &Hash {
&self.hash
}
pub fn validate_new_doc(&self, doc: NewDocument) -> Result<Document> {
match doc.schema_hash() {
Some(hash) if hash == &self.hash => (),
actual => {
return Err(Error::SchemaMismatch {
actual: actual.cloned(),
expected: None,
})
}
}
let parser = Parser::new(doc.data());
let (parser, _) = self.inner.doc.validate(&self.inner.types, parser, None)?;
parser.finish()?;
Ok(Document::from_new(doc))
}
pub fn encode_doc(&self, doc: Document) -> Result<(Hash, Vec<u8>)> {
match doc.schema_hash() {
Some(hash) if hash == &self.hash => (),
actual => {
return Err(Error::SchemaMismatch {
actual: actual.cloned(),
expected: None,
})
}
}
let (hash, doc, compression) = doc.complete();
let doc = match compression {
None => compress_doc(doc, &self.inner.doc_compress),
Some(None) => doc,
Some(Some(level)) => compress_doc(
doc,
&Compress::General {
algorithm: 0,
level,
},
),
};
Ok((hash, doc))
}
fn check_schema(&self, doc: &[u8]) -> Result<()> {
let split = SplitDoc::split(doc)?;
if split.hash_raw.is_empty() {
return Err(Error::SchemaMismatch {
actual: None,
expected: Some(self.hash.clone()),
});
}
let schema = Hash::try_from(split.hash_raw)
.map_err(|_| Error::BadHeader("Unable to decode schema hash".into()))?;
if schema != self.hash {
Err(Error::SchemaMismatch {
actual: Some(schema),
expected: Some(self.hash.clone()),
})
} else {
Ok(())
}
}
pub fn decode_doc(&self, doc: Vec<u8>) -> Result<Document> {
self.check_schema(&doc)?;
let doc = Document::new(decompress_doc(doc, &self.inner.doc_compress)?)?;
let parser = Parser::new(doc.data());
let (parser, _) = self.inner.doc.validate(&self.inner.types, parser, None)?;
parser.finish()?;
Ok(doc)
}
pub fn trusted_decode_doc(&self, doc: Vec<u8>) -> Result<Document> {
self.check_schema(&doc)?;
let doc = Document::new(decompress_doc(doc, &Compress::None)?)?;
Ok(doc)
}
pub fn encode_new_entry(&self, entry: NewEntry) -> Result<DataChecklist<(Hash, Vec<u8>)>> {
let parser = Parser::new(entry.data());
let entry_schema = self.inner.entries.get(entry.key()).ok_or_else(|| {
Error::FailValidate(format!("entry key \"{:?}\" is not in schema", entry.key()))
})?;
let checklist = Some(Checklist::new(&self.hash, &self.inner.types));
let (parser, checklist) =
entry_schema
.entry
.validate(&self.inner.types, parser, checklist)?;
parser.finish()?;
let (hash, entry, compression) = entry.complete();
let entry = match compression {
None => compress_entry(entry, &entry_schema.compress),
Some(None) => entry,
Some(Some(level)) => compress_doc(
entry,
&Compress::General {
algorithm: 0,
level,
},
),
};
Ok(DataChecklist::from_checklist(
checklist.unwrap(),
(hash, entry),
))
}
pub fn encode_entry(&self, entry: Entry) -> Result<DataChecklist<(Hash, Vec<u8>)>> {
let parser = Parser::new(entry.data());
let entry_schema = self.inner.entries.get(entry.key()).ok_or_else(|| {
Error::FailValidate(format!("entry key \"{:?}\" is not in schema", entry.key()))
})?;
let checklist = Some(Checklist::new(&self.hash, &self.inner.types));
let (parser, checklist) =
entry_schema
.entry
.validate(&self.inner.types, parser, checklist)?;
parser.finish()?;
let (hash, entry, compression) = entry.complete();
let entry = match compression {
None => compress_entry(entry, &entry_schema.compress),
Some(None) => entry,
Some(Some(level)) => compress_doc(
entry,
&Compress::General {
algorithm: 0,
level,
},
),
};
Ok(DataChecklist::from_checklist(
checklist.unwrap(),
(hash, entry),
))
}
pub fn decode_entry(
&self,
entry: Vec<u8>,
key: &str,
parent: &Hash,
) -> Result<DataChecklist<Entry>> {
let entry_schema = self.inner.entries.get(key).ok_or_else(|| {
Error::FailValidate(format!("entry key \"{:?}\" is not in schema", key))
})?;
let entry = Entry::new(
decompress_entry(entry, &entry_schema.compress)?,
key,
parent,
)?;
let parser = Parser::new(entry.data());
let checklist = Some(Checklist::new(&self.hash, &self.inner.types));
let (parser, checklist) =
entry_schema
.entry
.validate(&self.inner.types, parser, checklist)?;
parser.finish()?;
Ok(DataChecklist::from_checklist(checklist.unwrap(), entry))
}
pub fn trusted_decode_entry(&self, entry: Vec<u8>, key: &str, parent: &Hash) -> Result<Entry> {
let entry_schema = self.inner.entries.get(key).ok_or_else(|| {
Error::FailValidate(format!("entry key \"{:?}\" is not in schema", key))
})?;
let entry = Entry::new(
decompress_entry(entry, &entry_schema.compress)?,
key,
parent,
)?;
Ok(entry)
}
pub fn encode_query(&self, query: NewQuery) -> Result<Vec<u8>> {
let key = query.key();
let entry_schema = self.inner.entries.get(key).ok_or_else(|| {
Error::FailValidate(format!("entry key \"{:?}\" is not in schema", key))
})?;
if entry_schema
.entry
.query_check(&self.inner.types, query.validator())
{
query.complete(self.inner.max_regex)
} else {
Err(Error::FailValidate("Query is not allowed by schema".into()))
}
}
pub fn decode_query(&self, query: Vec<u8>) -> Result<Query> {
let query = Query::new(query, self.inner.max_regex)?;
let key = query.key();
let entry_schema = self.inner.entries.get(key).ok_or_else(|| {
Error::FailValidate(format!("entry key \"{:?}\" is not in schema", key))
})?;
if entry_schema
.entry
.query_check(&self.inner.types, query.validator())
{
Ok(query)
} else {
Err(Error::FailValidate("Query is not allowed by schema".into()))
}
}
}