binary_data_schema/
util.rs

1//! Utility stuff.
2
3use serde::Deserialize;
4use serde_json::Value;
5
6use crate::integer::IntegerSchema;
7
8/// Length encoding like present in the JSON description.
9#[derive(Debug, Clone, Deserialize)]
10#[serde(tag = "@type", rename_all = "lowercase")]
11pub enum RawLengthEncoding {
12    Fixed,
13    ExplicitLength(IntegerSchema),
14    EndPattern { sentinel: Value },
15    Capacity { padding: Value },
16    TillEnd,
17}
18
19/// The way the length of a variable length field is specified in an [array](crate::ArraySchema) or [string schema](crate::StringSchema).
20///
21/// The length encoding is usually given explicit with the parameter `"lengthEncoding"` in a schema.
22/// A length encoding is always a JSON object with a field `"type"`.
23/// The default length encoding is `"tillend"`.
24///
25/// # Length of Array and String
26///
27/// Depending on the surrounding schema the length encoded refers to different things.
28/// In an array schema the length denotes the number of elements in the encoded array.
29/// In a string array the length means the number of bytes required to store the UTF-8 encoded string.
30///
31///
32///
33#[derive(Debug, Clone)]
34pub enum LengthEncoding<T> {
35    /// Fixed Size &rarr; `"@type": "fixed"`.
36    ///
37    /// The encoded length is always the same.
38    /// Only values with the fixed length are valid for this schema.
39    ///
40    /// # Definition in the Schema
41    ///
42    /// In contrast to other length encodings `fixed` is may not be given explicitly.
43    /// Instead it requires `"minLength"` and `"maxLength"` for string and `"minItems"` and `"maxItems"` for array schemata be set to the same value.
44    ///
45    /// If `fixed` is given explicitly but `min` and `max` are different or one is missing the length encoding is not valid.
46    ///
47    /// ## Examples
48    ///
49    /// ```
50    /// # use binary_data_schema::{LengthEncoding, string::StringSchema};
51    /// # use serde_json::{json, from_value};
52    /// let schema = json!({
53    ///     "type": "string",
54    ///     "minLength": 2,
55    ///     "maxLength": 2
56    /// });
57    /// let schema = from_value::<StringSchema>(schema)?;
58    /// assert!(matches!(schema, StringSchema::Utf8 { length: LengthEncoding::Fixed(2) }));
59    /// # Ok::<(), anyhow::Error>(())
60    /// ```
61    ///
62    /// ```
63    /// # use binary_data_schema::{LengthEncoding, array::ArraySchema};
64    /// # use serde_json::{json, from_value};
65    /// let schema = json!({
66    ///     "type": "array",
67    ///     "minItems": 2,
68    ///     "lengthEncoding": { "@type": "fixed" },
69    ///     "items": { "type": "boolean" }
70    /// });
71    /// assert!(from_value::<ArraySchema>(schema).is_err());
72    /// # Ok::<(), anyhow::Error>(())
73    /// ```
74    Fixed(usize),
75    /// Length Encoded &rarr; `"@type": "explicitlength"`.
76    ///
77    /// Length encoded means that the length of the value encoded is encoded at the beginning of the field.
78    /// How the length is encoded is defined via an integer schema.
79    ///
80    /// # Example
81    ///
82    /// ```
83    /// # use binary_data_schema::*;
84    /// # use valico::json_schema;
85    /// # use serde_json::{json, from_value};
86    /// let schema = json!({
87    ///     "type": "string",
88    ///     "lengthEncoding": {
89    ///         "@type": "explicitlength",
90    ///         "length": 1
91    ///     }
92    /// });
93    ///
94    /// let mut scope = json_schema::Scope::new();
95    /// let j_schema = scope.compile_and_return(schema.clone(), false)?;
96    /// let schema = from_value::<DataSchema>(schema)?;
97    ///
98    /// let value = json!("tree");
99    /// assert!(j_schema.validate(&value).is_valid());
100    /// let mut encoded = Vec::new();
101    /// schema.encode(&mut encoded, &value)?;
102    /// let expected = [ 4, b't', b'r', b'e', b'e' ];
103    /// assert_eq!(&expected, encoded.as_slice());
104    ///
105    /// let mut encoded = std::io::Cursor::new(encoded);
106    /// let back = schema.decode(&mut encoded)?;
107    /// assert!(j_schema.validate(&back).is_valid());
108    /// assert_eq!(back, value);
109    /// # Ok::<(), anyhow::Error>(())
110    /// ```
111    LengthEncoded(IntegerSchema),
112    /// End Pattern &rarr; `"@type": "endpattern"`.
113    ///
114    /// With End Pattern the end of a variable length string or array is marked by a sentinel value.
115    /// The sentinel must be valid for the surrounding schema, i.e. for string schema it must adhere to the `"format"` and for array schema it must be valid for the `"items"` schema.
116    /// Furthermore, for string schema the sentinel must be encoded as one byte, e.g. an ASCII letter or if `"format": "binary"` two hex-digits.
117    ///
118    /// **Note:** The sentinel value is not allowed to be included in an encoded value!
119    ///
120    /// **Note2:** The sentinel values should bot be used with arrays with numeric items. The sentinel is tested for equality so rounding errors may result in not being able to identify the sentinel!
121    ///
122    /// # Examples
123    ///
124    /// ```
125    /// # use binary_data_schema::*;
126    /// # use valico::json_schema;
127    /// # use serde_json::{json, from_value};
128    /// let schema = json!({
129    ///     "type": "string",
130    ///     "lengthEncoding": {
131    ///         "@type": "endpattern",
132    ///         "sentinel": "!"
133    ///     }
134    /// });
135    ///
136    /// let mut scope = json_schema::Scope::new();
137    /// let j_schema = scope.compile_and_return(schema.clone(), false)?;
138    /// let schema = from_value::<DataSchema>(schema)?;
139    ///
140    /// let value = json!("tree");
141    /// assert!(j_schema.validate(&value).is_valid());
142    /// let mut encoded = Vec::new();
143    /// schema.encode(&mut encoded, &value)?;
144    /// let expected = [ b't', b'r', b'e', b'e', b'!' ];
145    /// assert_eq!(&expected, encoded.as_slice());
146    ///
147    /// let mut encoded = std::io::Cursor::new(encoded);
148    /// let back = schema.decode(&mut encoded)?;
149    /// assert!(j_schema.validate(&back).is_valid());
150    /// assert_eq!(back, value);
151    /// # Ok::<(), anyhow::Error>(())
152    /// ```
153    ///
154    /// ```
155    /// # use binary_data_schema::*;
156    /// # use valico::json_schema;
157    /// # use serde_json::{json, from_value};
158    /// let schema = json!({
159    ///     "type": "string",
160    ///     "format": "binary",
161    ///     "lengthEncoding": {
162    ///         "@type": "endpattern",
163    ///         "sentinel": "00"
164    ///     }
165    /// });
166    ///
167    /// let mut scope = json_schema::Scope::new();
168    /// let j_schema = scope.compile_and_return(schema.clone(), false)?;
169    /// let schema = from_value::<DataSchema>(schema)?;
170    ///
171    /// let value = json!("beef");
172    /// assert!(j_schema.validate(&value).is_valid());
173    /// let mut encoded = Vec::new();
174    /// schema.encode(&mut encoded, &value)?;
175    /// let expected = [ 0xbe, 0xef, 0x00 ];
176    /// assert_eq!(&expected, encoded.as_slice());
177    ///
178    /// let mut encoded = std::io::Cursor::new(encoded);
179    /// let back = schema.decode(&mut encoded)?;
180    /// assert!(j_schema.validate(&back).is_valid());
181    /// assert_eq!(back, value);
182    /// # Ok::<(), anyhow::Error>(())
183    /// ```
184    ///
185    /// ```
186    /// # use binary_data_schema::*;
187    /// # use valico::json_schema;
188    /// # use serde_json::{json, from_value};
189    /// let schema = json!({
190    ///     "type": "array",
191    ///     "lengthEncoding": {
192    ///         "@type": "endpattern",
193    ///         "sentinel": false
194    ///     },
195    ///     "items": { "type": "boolean" }
196    /// });
197    ///
198    /// let mut scope = json_schema::Scope::new();
199    /// let j_schema = scope.compile_and_return(schema.clone(), false)?;
200    /// let schema = from_value::<DataSchema>(schema)?;
201    ///
202    /// let value = json!([ true, true, false ]);
203    /// let mut encoded = Vec::new();
204    /// // value contains the sentinel value `false`
205    /// assert!(schema.encode(&mut encoded, &value).is_err());
206    /// # Ok::<(), anyhow::Error>(())
207    /// ```
208    EndPattern { sentinel: T },
209    /// Capacity &rarr; `"@type": "capacity"`.
210    ///
211    /// With capacity there is always a certain capacity reserved for the field in the encoded byte string.
212    /// For string schemata the capacity is defined by `"maxLength"` where it refers to the maximal length of string that can be encoded.
213    /// For array schemata `"maxItems"` defines the capacity.
214    ///
215    /// Unused space is filled with `"padding"`.
216    /// The padding must fulfill the same requirements as the `"sentinel"` of end pattern.
217    ///
218    /// A value may consume the whole reserved space.
219    /// In this case no padding is inserted.
220    ///
221    /// # Example
222    ///
223    /// ```
224    /// # use binary_data_schema::*;
225    /// # use valico::json_schema;
226    /// # use serde_json::{json, from_value};
227    /// let schema = json!({
228    ///     "type": "string",
229    ///     "format": "binary",
230    ///     "maxLength": 8,
231    ///     "lengthEncoding": {
232    ///         "@type": "capacity",
233    ///         "padding": "00"
234    ///     }
235    /// });
236    ///
237    /// let mut scope = json_schema::Scope::new();
238    /// let j_schema = scope.compile_and_return(schema.clone(), false)?;
239    /// let schema = from_value::<DataSchema>(schema)?;
240    ///
241    /// let value = json!("beef");
242    /// assert!(j_schema.validate(&value).is_valid());
243    /// let mut encoded = Vec::new();
244    /// schema.encode(&mut encoded, &value)?;
245    /// let expected = [ 0xbe, 0xef, 0x00, 0x00 ];
246    /// assert_eq!(&expected, encoded.as_slice());
247    ///
248    /// let mut encoded = std::io::Cursor::new(encoded);
249    /// let back = schema.decode(&mut encoded)?;
250    /// assert!(j_schema.validate(&back).is_valid());
251    /// assert_eq!(back, value);
252    /// # Ok::<(), anyhow::Error>(())
253    /// ```
254    Capacity { padding: T, capacity: usize },
255    /// Till End &rarr; `"@type": "tillend"`.
256    ///
257    /// Till End is the default length encoding.
258    /// It simply means that the end of an array or string is determined by the end of the byte string.
259    /// Accordingly, schemata with `"tillend"` length encoding are only allowed as the last property in an object schema.
260    TillEnd,
261}
262
263impl Default for RawLengthEncoding {
264    fn default() -> Self {
265        RawLengthEncoding::TillEnd
266    }
267}
268
269impl<T> LengthEncoding<T> {
270    pub fn fixed(fixed: usize) -> Self {
271        Self::Fixed(fixed)
272    }
273    pub fn length_encoded(schema: IntegerSchema) -> Self {
274        Self::LengthEncoded(schema)
275    }
276    pub fn end_pattern(pattern: T) -> Self {
277        Self::EndPattern { sentinel: pattern }
278    }
279    pub fn capacity(padding: T, capacity: usize) -> Self {
280        Self::Capacity { padding, capacity }
281    }
282    pub fn map<R, F>(self, f: F) -> LengthEncoding<R>
283    where
284        F: FnOnce(T) -> R,
285    {
286        match self {
287            Self::Fixed(fixed) => LengthEncoding::Fixed(fixed),
288            Self::LengthEncoded(schema) => LengthEncoding::LengthEncoded(schema),
289            Self::EndPattern { sentinel: pattern } => LengthEncoding::EndPattern {
290                sentinel: f(pattern),
291            },
292            Self::Capacity { padding, capacity } => LengthEncoding::Capacity {
293                padding: f(padding),
294                capacity,
295            },
296            Self::TillEnd => LengthEncoding::TillEnd,
297        }
298    }
299    pub fn try_map<R, E, F>(self, f: F) -> Result<LengthEncoding<R>, E>
300    where
301        F: FnOnce(T) -> Result<R, E>,
302    {
303        let new = match self {
304            Self::Fixed(fixed) => LengthEncoding::Fixed(fixed),
305            Self::LengthEncoded(schema) => LengthEncoding::LengthEncoded(schema),
306            Self::EndPattern { sentinel: pattern } => LengthEncoding::EndPattern {
307                sentinel: f(pattern)?,
308            },
309            Self::Capacity { padding, capacity } => LengthEncoding::Capacity {
310                padding: f(padding)?,
311                capacity,
312            },
313            Self::TillEnd => LengthEncoding::TillEnd,
314        };
315        Ok(new)
316    }
317}