1use std::{
8 collections::BTreeMap,
9 convert::{TryFrom, TryInto},
10};
11
12use crate::document::*;
13use crate::entry::*;
14pub use compress::*;
15use element::Parser;
16use query::{NewQuery, Query};
17
18use crate::error::{Error, Result};
19use crate::validator::{Checklist, DataChecklist, Validator};
20use crate::*;
21use serde::{Deserialize, Serialize};
22
23#[inline]
24fn compress_is_default(val: &Compress) -> bool {
25 if let Compress::General { algorithm, level } = val {
26 *algorithm == ALGORITHM_ZSTD && *level == 3
27 } else {
28 false
29 }
30}
31
32#[inline]
33fn int_is_zero(v: &Integer) -> bool {
34 v.as_u64().map(|v| v == 0).unwrap_or(false)
35}
36
37#[inline]
38fn u8_is_zero(v: &u8) -> bool {
39 *v == 0
40}
41
42#[derive(Clone, Debug, Serialize, Deserialize)]
43#[serde(deny_unknown_fields)]
44struct InnerSchema {
45 doc: Validator, #[serde(skip_serializing_if = "String::is_empty", default)]
47 description: String,
48 #[serde(skip_serializing_if = "compress_is_default", default)]
49 doc_compress: Compress,
50 #[serde(skip_serializing_if = "BTreeMap::is_empty", default)]
51 entries: BTreeMap<String, EntrySchema>,
52 #[serde(skip_serializing_if = "String::is_empty", default)]
53 name: String,
54 #[serde(skip_serializing_if = "BTreeMap::is_empty", default)]
55 types: BTreeMap<String, Validator>,
56 #[serde(skip_serializing_if = "int_is_zero", default)]
57 version: Integer,
58 #[serde(skip_serializing_if = "u8_is_zero", default)]
59 max_regex: u8,
60}
61
62#[derive(Clone, Debug, Serialize, Deserialize)]
63#[serde(deny_unknown_fields)]
64struct EntrySchema {
65 entry: Validator, #[serde(skip_serializing_if = "compress_is_default", default)]
67 compress: Compress,
68}
69
70#[derive(Clone, Debug, PartialEq, Eq)]
79pub struct NoSchema;
80
81impl NoSchema {
82 pub fn validate_new_doc(doc: NewDocument) -> Result<Document> {
86 if let Some(schema) = doc.schema_hash() {
88 return Err(Error::SchemaMismatch {
89 actual: Some(schema.to_owned()),
90 expected: None,
91 });
92 }
93
94 let types = BTreeMap::new();
96 let parser = Parser::new(doc.data());
97 let (parser, _) = Validator::Any.validate(&types, parser, None)?;
98 parser.finish()?;
99
100 Ok(Document::from_new(doc))
101 }
102
103 pub fn encode_doc(doc: Document) -> Result<(Hash, Vec<u8>)> {
106 if let Some(schema) = doc.schema_hash() {
108 return Err(Error::SchemaMismatch {
109 actual: Some(schema.to_owned()),
110 expected: None,
111 });
112 }
113
114 let (hash, doc, compression) = doc.complete();
116 let compression = match compression {
117 None => Compress::General {
118 algorithm: 0,
119 level: 3,
120 },
121 Some(None) => Compress::None,
122 Some(Some(level)) => Compress::General {
123 algorithm: 0,
124 level,
125 },
126 };
127 Ok((hash, compress_doc(doc, &compression)))
128 }
129
130 pub fn decode_doc(doc: Vec<u8>) -> Result<Document> {
132 let split = SplitDoc::split(&doc)?;
134 if !split.hash_raw.is_empty() {
135 return Err(Error::SchemaMismatch {
136 actual: split.hash_raw.try_into().ok(),
137 expected: None,
138 });
139 }
140
141 let doc = Document::new(decompress_doc(doc, &Compress::None)?)?;
143
144 let types = BTreeMap::new();
146 let parser = Parser::new(doc.data());
147 let (parser, _) = Validator::Any.validate(&types, parser, None)?;
148 parser.finish()?;
149
150 Ok(doc)
151 }
152
153 pub fn trusted_decode_doc(doc: Vec<u8>) -> Result<Document> {
157 let split = SplitDoc::split(&doc)?;
159 if !split.hash_raw.is_empty() {
160 return Err(Error::SchemaMismatch {
161 actual: split.hash_raw.try_into().ok(),
162 expected: None,
163 });
164 }
165
166 let doc = Document::new(decompress_doc(doc, &Compress::None)?)?;
168 Ok(doc)
169 }
170}
171
172fn compress_doc(doc: Vec<u8>, compression: &Compress) -> Vec<u8> {
173 if let Compress::None = compression {
175 return doc;
176 }
177
178 let split = SplitDoc::split(&doc).unwrap();
180 let header_len = doc.len() - split.data.len() - split.signature_raw.len();
181 let max_len = zstd_safe::compress_bound(split.data.len());
182 let mut compress = Vec::with_capacity(doc.len() + max_len - split.data.len());
183 compress.extend_from_slice(&doc[..header_len]);
184
185 match compression.compress(compress, split.data) {
187 Ok(mut compress) => {
188 let data_len = (compress.len() - header_len).to_le_bytes();
189 compress[0] = CompressType::type_of(compression).into();
190 compress[header_len - 3] = data_len[0];
191 compress[header_len - 2] = data_len[1];
192 compress[header_len - 1] = data_len[2];
193 compress.extend_from_slice(split.signature_raw);
194 compress
195 }
196 Err(()) => doc,
197 }
198}
199
200fn decompress_doc(compress: Vec<u8>, compression: &Compress) -> Result<Vec<u8>> {
201 let split = SplitDoc::split(&compress)?;
203 let marker = CompressType::try_from(split.compress_raw)
204 .map_err(|m| Error::BadHeader(format!("unrecognized compression marker 0x{:x}", m)))?;
205 if let CompressType::None = marker {
206 return Ok(compress);
207 }
208 let header_len = compress.len() - split.data.len() - split.signature_raw.len();
209
210 let mut doc = Vec::new();
212 doc.extend_from_slice(&compress[..header_len]);
213 let mut doc = compression.decompress(
214 doc,
215 split.data,
216 marker,
217 split.signature_raw.len(),
218 MAX_DOC_SIZE,
219 )?;
220 let data_len = (doc.len() - header_len).to_le_bytes();
221 doc[0] = CompressType::None.into();
222 doc[header_len - 3] = data_len[0];
223 doc[header_len - 2] = data_len[1];
224 doc[header_len - 1] = data_len[2];
225 doc.extend_from_slice(split.signature_raw);
226 Ok(doc)
227}
228
229fn compress_entry(entry: Vec<u8>, compression: &Compress) -> Vec<u8> {
230 if let Compress::None = compression {
232 return entry;
233 }
234
235 let split = SplitEntry::split(&entry).unwrap();
237 let max_len = zstd_safe::compress_bound(split.data.len());
238 let mut compress = Vec::with_capacity(entry.len() + max_len - split.data.len());
239 compress.extend_from_slice(&entry[..ENTRY_PREFIX_LEN]);
240
241 match compression.compress(compress, split.data) {
243 Ok(mut compress) => {
244 let data_len = (compress.len() - ENTRY_PREFIX_LEN).to_le_bytes();
245 compress[0] = CompressType::type_of(compression).into();
246 compress[1] = data_len[0];
247 compress[2] = data_len[1];
248 compress.extend_from_slice(split.signature_raw);
249 compress
250 }
251 Err(()) => entry,
252 }
253}
254
255fn decompress_entry(compress: Vec<u8>, compression: &Compress) -> Result<Vec<u8>> {
256 let split = SplitEntry::split(&compress)?;
258 let marker = CompressType::try_from(split.compress_raw)
259 .map_err(|m| Error::BadHeader(format!("unrecognized compression marker 0x{:x}", m)))?;
260 if let CompressType::None = marker {
261 return Ok(compress);
262 }
263
264 let mut entry = Vec::new();
266 entry.extend_from_slice(&compress[..ENTRY_PREFIX_LEN]);
267 let mut entry = compression.decompress(
268 entry,
269 split.data,
270 marker,
271 split.signature_raw.len(),
272 MAX_ENTRY_SIZE,
273 )?;
274 let data_len = (entry.len() - ENTRY_PREFIX_LEN).to_le_bytes();
275 entry[0] = CompressType::None.into();
276 entry[1] = data_len[0];
277 entry[2] = data_len[1];
278 entry.extend_from_slice(split.signature_raw);
279 Ok(entry)
280}
281
282#[derive(Clone, Debug)]
288pub struct SchemaBuilder {
289 inner: InnerSchema,
290}
291
292impl SchemaBuilder {
293 pub fn new(doc: Validator) -> Self {
296 Self {
297 inner: InnerSchema {
298 doc,
299 description: String::default(),
300 doc_compress: Compress::default(),
301 entries: BTreeMap::new(),
302 name: String::default(),
303 types: BTreeMap::new(),
304 version: Integer::default(),
305 max_regex: 0,
306 },
307 }
308 }
309
310 pub fn description(mut self, description: &str) -> Self {
312 self.inner.description = description.to_owned();
313 self
314 }
315
316 pub fn doc_compress(mut self, doc_compress: Compress) -> Self {
318 self.inner.doc_compress = doc_compress;
319 self
320 }
321
322 pub fn entry_add(
326 mut self,
327 entry: &str,
328 validator: Validator,
329 compress: Option<Compress>,
330 ) -> Self {
331 let compress = compress.unwrap_or_default();
332 self.inner.entries.insert(
333 entry.to_owned(),
334 EntrySchema {
335 entry: validator,
336 compress,
337 },
338 );
339 self
340 }
341
342 pub fn name(mut self, name: &str) -> Self {
344 self.inner.name = name.to_owned();
345 self
346 }
347
348 pub fn type_add(mut self, type_ref: &str, validator: Validator) -> Self {
350 self.inner.types.insert(type_ref.to_owned(), validator);
351 self
352 }
353
354 pub fn type_get(&self, type_ref: &str) -> Option<&Validator> {
356 self.inner.types.get(type_ref)
357 }
358
359 pub fn version<T: Into<Integer>>(mut self, version: T) -> Self {
361 self.inner.version = version.into();
362 self
363 }
364
365 pub fn regexes(mut self, max_regex: u8) -> Self {
367 self.inner.max_regex = max_regex;
368 self
369 }
370
371 pub fn build(self) -> Result<Document> {
373 let doc = NewDocument::new(None, self.inner)?;
374 NoSchema::validate_new_doc(doc)
375 }
376}
377
378#[derive(Clone, Debug)]
388pub struct Schema {
389 hash: Hash,
390 inner: InnerSchema,
391}
392
393impl Schema {
394 pub fn from_doc(doc: &Document) -> Result<Self> {
403 let inner = doc.deserialize()?;
404 let hash = doc.hash().clone();
405 Ok(Self { hash, inner })
406 }
407
408 pub fn from_doc_max_regex(doc: &Document, max_regex: u8) -> Result<Self> {
416 let regex_check: ValueRef = doc.deserialize()?;
418 let mut regexes = crate::count_regexes(®ex_check["doc"]);
419 if let Some(map) = regex_check["types"].as_map() {
420 regexes += map
421 .values()
422 .fold(0, |acc, val| acc + crate::count_regexes(val));
423 }
424 if let Some(map) = regex_check["entries"].as_map() {
425 regexes += map
426 .values()
427 .fold(0, |acc, val| acc + crate::count_regexes(&val["entry"]));
428 }
429
430 if regexes > (max_regex as usize) {
431 return Err(Error::FailValidate(format!(
432 "Found {} regexes in Schema, only {} allowed",
433 regexes, max_regex
434 )));
435 }
436
437 let inner = doc.deserialize()?;
438 let hash = doc.hash().clone();
439 Ok(Self { hash, inner })
440 }
441
442 pub fn hash(&self) -> &Hash {
444 &self.hash
445 }
446
447 pub fn validate_new_doc(&self, doc: NewDocument) -> Result<Document> {
450 match doc.schema_hash() {
452 Some(hash) if hash == &self.hash => (),
453 actual => {
454 return Err(Error::SchemaMismatch {
455 actual: actual.cloned(),
456 expected: Some(self.hash.clone()),
457 })
458 }
459 }
460
461 let parser = Parser::new(doc.data());
463 let (parser, _) = self.inner.doc.validate(&self.inner.types, parser, None)?;
464 parser.finish()?;
465
466 Ok(Document::from_new(doc))
467 }
468
469 pub fn encode_doc(&self, doc: Document) -> Result<(Hash, Vec<u8>)> {
472 match doc.schema_hash() {
474 Some(hash) if hash == &self.hash => (),
475 actual => {
476 return Err(Error::SchemaMismatch {
477 actual: actual.cloned(),
478 expected: Some(self.hash.clone()),
479 })
480 }
481 }
482
483 let (hash, doc, compression) = doc.complete();
485 let doc = match compression {
486 None => compress_doc(doc, &self.inner.doc_compress),
487 Some(None) => doc,
488 Some(Some(level)) => compress_doc(
489 doc,
490 &Compress::General {
491 algorithm: 0,
492 level,
493 },
494 ),
495 };
496
497 Ok((hash, doc))
498 }
499
500 fn check_schema(&self, doc: &[u8]) -> Result<()> {
501 let split = SplitDoc::split(doc)?;
503 if split.hash_raw.is_empty() {
504 return Err(Error::SchemaMismatch {
505 actual: None,
506 expected: Some(self.hash.clone()),
507 });
508 }
509 let schema = Hash::try_from(split.hash_raw)
510 .map_err(|_| Error::BadHeader("Unable to decode schema hash".into()))?;
511 if schema != self.hash {
512 Err(Error::SchemaMismatch {
513 actual: Some(schema),
514 expected: Some(self.hash.clone()),
515 })
516 } else {
517 Ok(())
518 }
519 }
520
521 pub fn decode_doc(&self, doc: Vec<u8>) -> Result<Document> {
523 self.check_schema(&doc)?;
524
525 let doc = Document::new(decompress_doc(doc, &self.inner.doc_compress)?)?;
527
528 let parser = Parser::new(doc.data());
530 let (parser, _) = self.inner.doc.validate(&self.inner.types, parser, None)?;
531 parser.finish()?;
532
533 Ok(doc)
534 }
535
536 pub fn trusted_decode_doc(&self, doc: Vec<u8>) -> Result<Document> {
540 self.check_schema(&doc)?;
541
542 let doc = Document::new(decompress_doc(doc, &Compress::None)?)?;
544 Ok(doc)
545 }
546
547 pub fn validate_new_entry(&self, entry: NewEntry) -> Result<DataChecklist<Entry>> {
552 if entry.schema_hash() != &self.hash {
554 return Err(Error::SchemaMismatch {
555 actual: Some(entry.schema_hash().clone()),
556 expected: Some(self.hash.clone()),
557 });
558 }
559
560 let parser = Parser::new(entry.data());
562 let entry_schema = self.inner.entries.get(entry.key()).ok_or_else(|| {
563 Error::FailValidate(format!("entry key \"{:?}\" is not in schema", entry.key()))
564 })?;
565 let checklist = Some(Checklist::new(&self.hash, &self.inner.types));
566 let (parser, checklist) =
567 entry_schema
568 .entry
569 .validate(&self.inner.types, parser, checklist)?;
570 parser.finish()?;
571
572 Ok(DataChecklist::from_checklist(
573 checklist.unwrap(),
574 Entry::from_new(entry),
575 ))
576 }
577
578 pub fn encode_entry(&self, entry: Entry) -> Result<(EntryRef, Vec<u8>, Vec<Hash>)> {
582 if entry.schema_hash() != &self.hash {
584 return Err(Error::SchemaMismatch {
585 actual: Some(entry.schema_hash().clone()),
586 expected: Some(self.hash.clone()),
587 });
588 }
589
590 let entry_schema = self.inner.entries.get(entry.key()).ok_or_else(|| {
597 Error::FailValidate(format!("entry key \"{:?}\" is not in schema", entry.key()))
598 })?;
599 let parser = Parser::new(entry.data());
600 let checklist = Some(Checklist::new(&self.hash, &self.inner.types));
601 let (parser, checklist) =
602 entry_schema
603 .entry
604 .validate(&self.inner.types, parser, checklist)?;
605 parser.finish()?;
606 let needed_docs: Vec<Hash> = checklist.unwrap().iter().map(|(hash, _)| hash).collect();
607
608 let (entry_ref, entry, compression) = entry.complete();
610 let entry = match compression {
611 None => compress_entry(entry, &entry_schema.compress),
612 Some(None) => entry,
613 Some(Some(level)) => compress_entry(
614 entry,
615 &Compress::General {
616 algorithm: 0,
617 level,
618 },
619 ),
620 };
621
622 Ok((entry_ref, entry, needed_docs))
623 }
624
625 pub fn decode_entry(
628 &self,
629 entry: Vec<u8>,
630 key: &str,
631 parent: &Document,
632 ) -> Result<DataChecklist<Entry>> {
633 match parent.schema_hash() {
635 Some(hash) if hash == &self.hash => (),
636 actual => {
637 return Err(Error::SchemaMismatch {
638 actual: actual.cloned(),
639 expected: Some(self.hash.clone()),
640 })
641 }
642 }
643
644 let entry_schema = self.inner.entries.get(key).ok_or_else(|| {
646 Error::FailValidate(format!("entry key \"{:?}\" is not in schema", key))
647 })?;
648
649 let entry = Entry::new(
651 decompress_entry(entry, &entry_schema.compress)?,
652 key,
653 parent,
654 )?;
655
656 let parser = Parser::new(entry.data());
658 let checklist = Some(Checklist::new(&self.hash, &self.inner.types));
659 let (parser, checklist) =
660 entry_schema
661 .entry
662 .validate(&self.inner.types, parser, checklist)?;
663 parser.finish()?;
664
665 Ok(DataChecklist::from_checklist(checklist.unwrap(), entry))
666 }
667
668 pub fn trusted_decode_entry(
672 &self,
673 entry: Vec<u8>,
674 key: &str,
675 parent: &Document,
676 entry_hash: &Hash,
677 ) -> Result<Entry> {
678 match parent.schema_hash() {
680 Some(hash) if hash == &self.hash => (),
681 actual => {
682 return Err(Error::SchemaMismatch {
683 actual: actual.cloned(),
684 expected: Some(self.hash.clone()),
685 })
686 }
687 }
688 let entry_schema = self.inner.entries.get(key).ok_or_else(|| {
690 Error::FailValidate(format!("entry key \"{:?}\" is not in schema", key))
691 })?;
692
693 let entry = Entry::trusted_new(
695 decompress_entry(entry, &entry_schema.compress)?,
696 key,
697 parent,
698 entry_hash,
699 )?;
700 Ok(entry)
701 }
702
703 pub fn encode_query(&self, query: NewQuery) -> Result<Vec<u8>> {
710 let key = query.key();
711 let entry_schema = self.inner.entries.get(key).ok_or_else(|| {
712 Error::FailValidate(format!("entry key \"{:?}\" is not in schema", key))
713 })?;
714 if entry_schema
715 .entry
716 .query_check(&self.inner.types, query.validator())
717 {
718 query.complete(self.inner.max_regex)
719 } else {
720 Err(Error::FailValidate("Query is not allowed by schema".into()))
721 }
722 }
723
724 pub fn decode_query(&self, query: Vec<u8>) -> Result<Query> {
732 let query = Query::new(query, self.inner.max_regex)?;
733 let key = query.key();
734 let entry_schema = self.inner.entries.get(key).ok_or_else(|| {
735 Error::FailValidate(format!("entry key \"{:?}\" is not in schema", key))
736 })?;
737 if entry_schema
738 .entry
739 .query_check(&self.inner.types, query.validator())
740 {
741 Ok(query)
742 } else {
743 Err(Error::FailValidate("Query is not allowed by schema".into()))
744 }
745 }
746}