binary_data_schema/
lib.rs

1//! Binary Data Schema (BDS) is an extension of [JSON schema].
2//! With this extension it is possible to convert JSON documents into raw bytes and
3//! reverse.
4//!
5//! The intention is to use BDS in [WoT Thing Descriptions] in order to allow `application/octet-stream` as a [content type for forms].
6//!
7//! # Capabilities
8//!
9//! Following a list of all types that can be encoded.
10//! The list may not be exhaustive.
11//! Further explanation can be found in the description of the respective modules.
12//!
13//! - Boolean values (`true`, `false`)
14//! - Integer values:
15//!   - Length 1 to 8 bytes
16//!   - Big and Little Endianness
17//!   - Signed or unsigned
18//! - Number values:
19//!   - Single and double precision according to [IEEE 754]
20//!   - Via linear transformation (`value / scale - offset`) encoding as integer
21//! - Boolean, integer and numeric values can be encoded as bitfields (cover only a certain number of bits instead of whole bytes)
22//! - Object and array schemata allow for encoding complex data structures
23//! - UTF-8 strings
24//! - Hex-encoded strings (regex: `^[0-9a-f]{2}*$`)
25//! - Variable sized values, i.e. strings and arrays, have different ways to [define their length](crate::LengthEncoding):
26//!   - Fixed size
27//!   - Explicit length → Length of the value is encoded at the beginning of the value
28//!   - End pattern → The end of the value is marked by a sentinel value, like in C with `\0`
29//!   - Capacity → A fixed space is reserved. Unused space is filled with padding
30//!   - Till end → The value continues until the end of the message
31//!
32//! # Features
33//!
34//! The specific features for each schema are explained in their sub module:
35//!
36//! - [boolean schema](boolean)
37//! - [integer schema](integer)
38//! - [number schema](number)
39//! - [string schema](string)
40//! - [array schema](array)
41//! - [object schema](object)
42//!
43//! Each feature is explained with an example. The examples follow the same structure as the (commented) [`default` example](#example) below.
44//!
45//! BDS is by far not feature complete. If you do not find a feature described it is probably safe to assume that it is not yet implemented.
46//! If you require a specific feature [file an issue], please.
47//! PRs are also welcome.
48//!
49//! ## `default`
50//!
51//! The only feature described on this level is `default`.
52//!
53//! In general binary protocols often have some kind of _magic_ start and end bytes.
54//! To simulate those BDS uses the [`default` keyword].
55//! When encoding a JSON document fields whose schema has a `default` value those do not have to be provided.
56//!
57//! - Fields with `default` are not required for encoding but included when
58//!   decoded.
59//! - To keep BDS aligned with [JSON schema] it is recommended to add
60//!   [`"required"`] to object schemata.
61//!
62//! ### Example
63//!
64//! ```
65//! # use binary_data_schema::*;
66//! # use valico::json_schema;
67//! # use serde_json::{json, from_value};
68//! let schema = json!({
69//!     "type": "object",
70//!     "properties": {
71//!         "start": {
72//!             "type": "string",
73//!             "format": "binary",
74//!             "minLength": 2,
75//!             "maxLength": 2,
76//!             "default": "fe",
77//!             "position": 1
78//!         },
79//!         "is_on": {
80//!             "type": "boolean",
81//!             "position": 5
82//!         },
83//!         "end": {
84//!             "type": "string",
85//!             "format": "binary",
86//!             "minLength": 2,
87//!             "maxLength": 2,
88//!             "default": "ef",
89//!             "position": 10
90//!         }
91//!     },
92//!     "required": ["is_on"]
93//! });
94//! let mut scope = json_schema::Scope::new();
95//! // Valid JSON schema
96//! let j_schema = scope.compile_and_return(schema.clone(), false)?;
97//! // Valid Binary Data schema
98//! let schema = from_value::<DataSchema>(schema)?;
99//!
100//! let value = json!({ "is_on": true });
101//! // 'value' is valid for the JSON schema
102//! assert!(j_schema.validate(&value).is_valid());
103//! let mut encoded = Vec::new();
104//! // 'value' is valid for the Binary Data schema
105//! schema.encode(&mut encoded, &value)?;
106//! # let expected = [0xfe, 1, 0xef];
107//! # assert_eq!(&expected, encoded.as_slice());
108//!
109//! let mut encoded = std::io::Cursor::new(encoded);
110//! let back = schema.decode(&mut encoded)?;
111//! let expected = json!({
112//!     "start": "fe",
113//!     "is_on": true,
114//!     "end": "ef"
115//! });
116//! // The retrieved value is valid for the JSON schema
117//! assert!(j_schema.validate(&back).is_valid());
118//! // The retrieved value is as expected
119//! assert_eq!(back, expected);
120//! # Ok::<(), anyhow::Error>(())
121//! ```
122//!
123//! [JSON schema]: https://json-schema.org/
124//! [WoT Thing Descriptions]: https://www.w3.org/TR/wot-thing-description
125//! [content type for forms]: https://www.w3.org/TR/2020/NOTE-wot-binding-templates-20200130/#content-types
126//! [file an issue]: https://github.com/wintechis/binary-data-schema/issues
127//! [IEEE 754]: https://ieeexplore.ieee.org/document/8766229
128//! [`default` keyword]: http://json-schema.org/understanding-json-schema/reference/generic.html#annotations
129//! [`"required"`]: http://json-schema.org/understanding-json-schema/reference/object.html#required-properties
130
131#![warn(missing_debug_implementations)]
132
133pub mod array;
134pub mod boolean;
135pub mod integer;
136pub mod number;
137pub mod object;
138pub mod string;
139pub(crate) mod util;
140pub use self::util::LengthEncoding;
141
142use std::{convert::TryFrom, io, string::FromUtf8Error};
143
144use byteorder::{ReadBytesExt, WriteBytesExt};
145use integer::Bitfield;
146use serde::{de::Error as DeError, Deserialize, Deserializer};
147use serde_json::Value;
148
149use crate::{
150    array::ArraySchema, boolean::BooleanSchema, integer::IntegerSchema, number::NumberSchema,
151    object::ObjectSchema, string::StringSchema,
152};
153
154pub type Result<T, E = Error> = std::result::Result<T, E>;
155
156/// A schema to serialize a value to bytes.
157pub trait Encoder {
158    /// Error encoding a value.
159    type Error;
160
161    /// Write a Json value according to the schema.
162    fn encode<W>(&self, target: &mut W, value: &Value) -> Result<usize, Self::Error>
163    where
164        W: io::Write + WriteBytesExt;
165}
166
167/// A schema to de-serialize a value from bytes.
168pub trait Decoder {
169    /// Error decoding a value.
170    type Error;
171
172    /// Decode a value from a target with the given schema.
173    fn decode<R>(&self, target: &mut R) -> Result<Value, Self::Error>
174    where
175        R: io::Read + ReadBytesExt;
176}
177
178/// Errors from binary serialization.
179#[derive(Debug, thiserror::Error)]
180pub enum Error {
181    #[error("Invalid string schema: {0}")]
182    ValidateString(#[from] string::ValidationError),
183    #[error("Encoding with string schema failed: {0}")]
184    EncodeString(#[from] string::EncodingError),
185    #[error("Decoding with string schema failed: {0}")]
186    DecodeString(#[from] string::DecodingError),
187    #[error("Invalid boolean schema: {0}")]
188    ValidateBoolean(#[from] boolean::ValidationError),
189    #[error("Encoding with boolean schema failed: {0}")]
190    EncodeBoolean(#[from] boolean::EncodingError),
191    #[error("Decoding with boolean schema failed: {0}")]
192    DecodeBoolean(#[from] boolean::DecodingError),
193    #[error("Invalid number schema: {0}")]
194    ValidateNumber(#[from] number::ValidationError),
195    #[error("Encoding with number schema failed: {0}")]
196    EncodeNumber(#[from] number::EncodingError),
197    #[error("Decoding with number schema failed: {0}")]
198    DecodeNumber(#[from] number::DecodingError),
199    #[error("Invalid integer schema: {0}")]
200    ValidateInteger(#[from] integer::ValidationError),
201    #[error("Encoding with integer schema failed: {0}")]
202    EncodeInteger(#[from] integer::EncodingError),
203    #[error("Decoding with integer schema failed: {0}")]
204    DecodeInteger(#[from] integer::DecodingError),
205    #[error("Invalid object schema: {0}")]
206    ValidateObject(#[from] object::ValidationError),
207    #[error("Encoding with object schema failed: {0}")]
208    EncodeObject(#[from] object::EncodingError),
209    #[error("Decoding with object schema failed: {0}")]
210    DecodeObject(#[from] object::DecodingError),
211    #[error("Invalid array schema: {0}")]
212    ValidateArray(#[from] array::ValidationError),
213    #[error("Encoding with array schema failed: {0}")]
214    EncodeArray(#[from] array::EncodingError),
215    #[error("Decoding with array schema failed: {0}")]
216    DecodeArray(#[from] array::DecodingError),
217
218    #[error("Invalid JSON: {0}")]
219    Serialization(#[from] serde_json::Error),
220    #[error("IO Error: {0}")]
221    IoFail(#[from] io::Error),
222    #[error("The encoded string is invalid: {0}")]
223    InvalidString(#[from] FromUtf8Error),
224    #[error("The encoded string is invalid: {0}")]
225    InvalidBString(#[from] bstr::FromUtf8Error),
226    #[error("Binary format string is invalid: {0}")]
227    BinaryEncoding(#[from] hex::FromHexError),
228    #[error("Can't encode '{value}' as string: {source}")]
229    StringEncoding {
230        value: String,
231        source: crate::string::EncodingError,
232    },
233
234    #[error("The value '{value}' can not be encoded with a {type_} schema")]
235    InvalidValue { value: String, type_: &'static str },
236
237    #[error("The default character has to be UTF8 encoded as one byte but '{0}' is encoded in {} bytes", .0.len_utf8())]
238    InvalidDefaultChar(char),
239    #[error("'{0}' is not a field in the schema")]
240    NotAField(String),
241
242    #[error("A Json object was expected but got: {0}")]
243    NotAnObject(String),
244    #[error("The length of an array must be encoded in some way")]
245    MissingArrayLength,
246    #[error(
247        "Can not encode array {value} as its length is {len} but only length {fixed} is supported"
248    )]
249    NotMatchFixedLength {
250        value: String,
251        len: usize,
252        fixed: usize,
253    },
254    #[error("Can not encode array {value} as its length is {len} but only length up to {max} can be encoded")]
255    ExceededLengthEncoding {
256        value: String,
257        len: usize,
258        max: usize,
259    },
260    #[error("There are contrary specifications for a fixed-length array")]
261    InconsitentFixedLength,
262
263    #[error("Can not decode value as the encoded lenght is {len} but capcacity is only {cap}")]
264    EncodedValueExceedsCapacity { len: usize, cap: usize },
265
266    #[error("The value '{value}' is invalid as 'default' for a {type_} schema: {source}")]
267    InvalidDefault {
268        value: String,
269        type_: &'static str,
270        #[source]
271        source: Box<Error>,
272    },
273    #[error("Expected the constant value {expected} but got {got}")]
274    InvalidConstValue { expected: String, got: String },
275}
276
277impl Error {
278    pub fn due_to_eof(&self) -> bool {
279        match &self {
280            Error::DecodeString(e) => e.due_to_eof(),
281            Error::DecodeBoolean(e) => e.due_to_eof(),
282            Error::DecodeNumber(e) => e.due_to_eof(),
283            Error::DecodeInteger(e) => e.due_to_eof(),
284            Error::DecodeObject(e) => e.due_to_eof(),
285            Error::DecodeArray(e) => e.due_to_eof(),
286            _ => false,
287        }
288    }
289}
290
291/// Order of bytes within a field.
292#[derive(Debug, Copy, Clone, Deserialize, Eq, PartialEq)]
293#[serde(rename_all = "lowercase")]
294pub enum ByteOrder {
295    /// LSB first.
296    LittleEndian,
297    /// MSB first.
298    BigEndian,
299}
300
301/// Raw data schema to catch constant values.
302#[derive(Debug, Clone, Deserialize)]
303struct RawDataSchema {
304    #[serde(flatten)]
305    inner: InnerSchema,
306    #[serde(rename = "default")]
307    default_: Option<Value>,
308}
309
310/// The inner data schema without special features like `"default"`.
311#[derive(Debug, Clone, Deserialize)]
312#[serde(tag = "type", rename_all = "lowercase")]
313pub enum InnerSchema {
314    Boolean(BooleanSchema),
315    Integer(IntegerSchema),
316    Number(NumberSchema),
317    String(Box<StringSchema>),
318    Array(Box<ArraySchema>),
319    Object(ObjectSchema),
320}
321
322/// The data schema is the typical type users will interact with.
323#[derive(Debug, Clone)]
324pub struct DataSchema {
325    inner: InnerSchema,
326    default_: Option<Value>,
327}
328
329impl Default for ByteOrder {
330    fn default() -> Self {
331        ByteOrder::BigEndian
332    }
333}
334
335impl InnerSchema {
336    fn type_(&self) -> &'static str {
337        match self {
338            InnerSchema::Boolean(_) => "boolean",
339            InnerSchema::Integer(_) => "integer",
340            InnerSchema::Number(_) => "number",
341            InnerSchema::String(_) => "string",
342            InnerSchema::Array(_) => "array",
343            InnerSchema::Object(_) => "object",
344        }
345    }
346    fn is_bitfield(&self) -> bool {
347        self.bitfield().is_some()
348    }
349    /// Return the inner bitfield if there is some.
350    fn bitfield(&self) -> Option<&Bitfield> {
351        match &self {
352            Self::Number(NumberSchema::Integer {
353                integer: IntegerSchema::Bitfield(bf),
354                ..
355            })
356            | Self::Integer(IntegerSchema::Bitfield(bf))
357            | Self::Boolean(BooleanSchema { bf }) => Some(bf),
358            _ => None,
359        }
360    }
361}
362
363impl Encoder for InnerSchema {
364    type Error = Error;
365
366    fn encode<W>(&self, target: &mut W, value: &Value) -> Result<usize, Self::Error>
367    where
368        W: io::Write + WriteBytesExt,
369    {
370        let written = match self {
371            InnerSchema::Boolean(schema) => schema.encode(target, value)?,
372            InnerSchema::Integer(schema) => schema.encode(target, value)?,
373            InnerSchema::Number(schema) => schema.encode(target, value)?,
374            InnerSchema::String(schema) => schema.encode(target, value)?,
375            InnerSchema::Array(schema) => schema.encode(target, value)?,
376            InnerSchema::Object(schema) => schema.encode(target, value)?,
377        };
378        Ok(written)
379    }
380}
381
382impl Decoder for InnerSchema {
383    type Error = Error;
384
385    fn decode<R>(&self, target: &mut R) -> Result<Value, Self::Error>
386    where
387        R: io::Read + ReadBytesExt,
388    {
389        let value = match self {
390            InnerSchema::Boolean(schema) => schema.decode(target)?,
391            InnerSchema::Integer(schema) => schema.decode(target)?,
392            InnerSchema::Number(schema) => schema.decode(target)?,
393            InnerSchema::String(schema) => schema.decode(target)?,
394            InnerSchema::Array(schema) => schema.decode(target)?,
395            InnerSchema::Object(schema) => schema.decode(target)?,
396        };
397        Ok(value)
398    }
399}
400
401impl DataSchema {
402    /// The `"type"` tags value.
403    pub fn type_(&self) -> &'static str {
404        self.inner.type_()
405    }
406    /// Check whether the data schema encodes to/from a bitfield.
407    pub fn is_bitfield(&self) -> bool {
408        self.inner.is_bitfield()
409    }
410}
411
412impl TryFrom<RawDataSchema> for DataSchema {
413    type Error = Error;
414
415    fn try_from(raw: RawDataSchema) -> Result<Self, Self::Error> {
416        if let Some(value) = &raw.default_ {
417            let mut dummy = Vec::new();
418            if let Err(e) = raw.inner.encode(&mut dummy, value) {
419                return Err(Error::InvalidDefault {
420                    value: value.to_string(),
421                    type_: raw.inner.type_(),
422                    source: Box::new(e),
423                });
424            }
425        }
426
427        Ok(Self {
428            inner: raw.inner,
429            default_: raw.default_,
430        })
431    }
432}
433
434impl<'de> Deserialize<'de> for DataSchema {
435    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
436    where
437        D: Deserializer<'de>,
438    {
439        let raw = RawDataSchema::deserialize(deserializer)?;
440        DataSchema::try_from(raw).map_err(D::Error::custom)
441    }
442}
443
444impl From<ObjectSchema> for InnerSchema {
445    fn from(v: ObjectSchema) -> Self {
446        Self::Object(v)
447    }
448}
449
450impl From<ArraySchema> for InnerSchema {
451    fn from(v: ArraySchema) -> Self {
452        Self::Array(Box::new(v))
453    }
454}
455
456impl From<Box<ArraySchema>> for InnerSchema {
457    fn from(v: Box<ArraySchema>) -> Self {
458        Self::Array(v)
459    }
460}
461
462impl From<BooleanSchema> for InnerSchema {
463    fn from(v: BooleanSchema) -> Self {
464        Self::Boolean(v)
465    }
466}
467
468impl From<IntegerSchema> for InnerSchema {
469    fn from(v: IntegerSchema) -> Self {
470        Self::Integer(v)
471    }
472}
473
474impl From<NumberSchema> for InnerSchema {
475    fn from(v: NumberSchema) -> Self {
476        Self::Number(v)
477    }
478}
479
480impl From<StringSchema> for InnerSchema {
481    fn from(v: StringSchema) -> Self {
482        Self::String(Box::new(v))
483    }
484}
485
486impl<S> From<S> for DataSchema
487where
488    S: Into<InnerSchema>,
489{
490    fn from(schema: S) -> Self {
491        let inner = schema.into();
492        Self {
493            inner,
494            default_: None,
495        }
496    }
497}
498
499impl Encoder for DataSchema {
500    type Error = Error;
501
502    fn encode<W>(&self, target: &mut W, value: &Value) -> Result<usize, Self::Error>
503    where
504        W: io::Write + WriteBytesExt,
505    {
506        if let Some(c) = &self.default_ {
507            self.inner.encode(target, c)
508        } else {
509            self.inner.encode(target, value)
510        }
511    }
512}
513
514impl Decoder for DataSchema {
515    type Error = Error;
516
517    fn decode<R>(&self, target: &mut R) -> Result<Value, Self::Error>
518    where
519        R: io::Read + ReadBytesExt,
520    {
521        self.inner.decode(target)
522    }
523}
524
525#[cfg(test)]
526mod test {
527    use super::*;
528    use anyhow::Result;
529    use serde_json::{from_value, json};
530
531    #[test]
532    fn const_schema() -> Result<()> {
533        let schema = json!({
534            "type": "object",
535            "properties": {
536                "start": {
537                    "type": "integer",
538                    "length": 1,
539                    "default": 100,
540                    "position": 1
541                },
542                "is_on": {
543                    "type": "boolean",
544                    "position": 5
545                },
546                "end": {
547                    "type": "integer",
548                    "length": 1,
549                    "signed": false,
550                    "default": 200,
551                    "position": 10,
552                }
553            }
554        });
555        let schema = from_value::<DataSchema>(schema)?;
556        println!("schema: {:#?}", schema);
557        let value = json!({ "is_on": true });
558        let expected = [100, 1, 200];
559        let mut buffer = Vec::new();
560
561        assert_eq!(3, schema.encode(&mut buffer, &value)?);
562        assert_eq!(&expected, buffer.as_slice());
563
564        let mut cursor = std::io::Cursor::new(buffer);
565        let returned = schema.decode(&mut cursor)?;
566        let expected = json!({
567            "start": 100,
568            "is_on": true,
569            "end": 200
570        });
571        assert_eq!(returned, expected);
572
573        Ok(())
574    }
575
576    #[test]
577    fn led_rgb() -> Result<()> {
578        let schema = json!({
579            "type": "object",
580            "properties": {
581                "start": {
582                    "type": "string",
583                    "format": "binary",
584                    "minLength": 8,
585                    "maxLength": 8,
586                    "default": "7e000503",
587                    "position": 1
588                },
589                "red": {
590                    "type": "integer",
591                    "signed": false,
592                    "length": 1,
593                    "position": 3,
594                    "description": "Red value of the color [0 - 255]."
595                },
596                "green": {
597                    "type": "integer",
598                    "signed": false,
599                    "length": 1,
600                    "position": 4,
601                    "description": "Green value of the color [0 - 255]."
602                },
603                "blue": {
604                    "type": "integer",
605                    "signed": false,
606                    "length": 1,
607                    "position": 5,
608                    "description": "Blue value of the color [0 - 255]."
609                },
610                "end": {
611                    "type": "string",
612                    "format": "binary",
613                    "minLength": 4,
614                    "maxLength": 4,
615                    "default": "00ef",
616                    "position": 10
617                }
618            }
619        });
620        let schema = from_value::<DataSchema>(schema)?;
621        let value = json!({ "red": 255, "green": 16, "blue": 255 });
622        let expected = [0x7e, 0, 0x05, 0x03, 0xff, 0x10, 0xff, 0, 0xef];
623
624        let mut buffer = Vec::new();
625        assert_eq!(9, schema.encode(&mut buffer, &value)?);
626        assert_eq!(&expected, buffer.as_slice());
627
628        let mut input = std::io::Cursor::new(expected);
629        let returned = schema.decode(&mut input)?;
630        assert_eq!(returned["red"], 255);
631        assert_eq!(returned["green"], 16);
632        assert_eq!(returned["blue"], 255);
633
634        Ok(())
635    }
636
637    #[test]
638    fn led_power() -> Result<()> {
639        let schema = json!({
640            "type": "object",
641            "properties": {
642                "start": {
643                    "type": "string",
644                    "format": "binary",
645                    "minLength": 6,
646                    "maxLength": 6,
647                    "default": "7e0004",
648                    "position": 1
649                },
650                "is_on": {
651                    "type": "boolean",
652                    "position": 5
653                },
654                "end": {
655                    "type": "string",
656                    "format": "binary",
657                    "minLength": 10,
658                    "maxLength": 10,
659                    "default": "00000000ef",
660                    "position": 10
661                }
662            }
663        });
664        let schema = from_value::<DataSchema>(schema)?;
665        let value = json!({ "is_on": true });
666        let expected = [0x7e, 0, 0x04, 1, 0, 0, 0, 0, 0xef];
667
668        let mut buffer = Vec::new();
669        assert_eq!(9, schema.encode(&mut buffer, &value)?);
670        assert_eq!(&expected, buffer.as_slice());
671
672        Ok(())
673    }
674
675    #[test]
676    fn doc() -> anyhow::Result<()> {
677        use super::*;
678        use serde_json::{from_value, json};
679        use valico::json_schema;
680        let schema = json!({
681            "type": "object",
682            "properties": {
683                "start": {
684                    "type": "string",
685                    "format": "binary",
686                    "minLength": 2,
687                    "maxLength": 2,
688                    "default": "fe",
689                    "position": 1
690                },
691                "is_on": {
692                    "type": "boolean",
693                    "position": 5
694                },
695                "end": {
696                    "type": "string",
697                    "format": "binary",
698                    "minLength": 2,
699                    "maxLength": 2,
700                    "default": "ef",
701                    "position": 10
702                }
703            },
704            "required": ["is_on"]
705        });
706        let mut scope = json_schema::Scope::new();
707        let j_schema = scope.compile_and_return(schema.clone(), false)?;
708        let schema = from_value::<DataSchema>(schema)?;
709        let value = json!({ "is_on": true });
710        assert!(j_schema.validate(&value).is_valid());
711        let mut encoded = Vec::new();
712        schema.encode(&mut encoded, &value)?;
713        let expected = [0xfe, 1, 0xef];
714        assert_eq!(&expected, encoded.as_slice());
715        let mut encoded = std::io::Cursor::new(encoded);
716        let back = schema.decode(&mut encoded)?;
717        let expected = json!({
718            "start": "fe",
719            "is_on": true,
720            "end": "ef"
721        });
722        assert!(j_schema.validate(&back).is_valid());
723        assert_eq!(back, expected);
724        Ok::<(), anyhow::Error>(())
725    }
726}