binary_data_schema/util.rs
1//! Utility stuff.
2
3use serde::Deserialize;
4use serde_json::Value;
5
6use crate::integer::IntegerSchema;
7
8/// Length encoding like present in the JSON description.
9#[derive(Debug, Clone, Deserialize)]
10#[serde(tag = "@type", rename_all = "lowercase")]
11pub enum RawLengthEncoding {
12 Fixed,
13 ExplicitLength(IntegerSchema),
14 EndPattern { sentinel: Value },
15 Capacity { padding: Value },
16 TillEnd,
17}
18
19/// The way the length of a variable length field is specified in an [array](crate::ArraySchema) or [string schema](crate::StringSchema).
20///
21/// The length encoding is usually given explicit with the parameter `"lengthEncoding"` in a schema.
22/// A length encoding is always a JSON object with a field `"type"`.
23/// The default length encoding is `"tillend"`.
24///
25/// # Length of Array and String
26///
27/// Depending on the surrounding schema the length encoded refers to different things.
28/// In an array schema the length denotes the number of elements in the encoded array.
29/// In a string array the length means the number of bytes required to store the UTF-8 encoded string.
30///
31///
32///
33#[derive(Debug, Clone)]
34pub enum LengthEncoding<T> {
35 /// Fixed Size → `"@type": "fixed"`.
36 ///
37 /// The encoded length is always the same.
38 /// Only values with the fixed length are valid for this schema.
39 ///
40 /// # Definition in the Schema
41 ///
42 /// In contrast to other length encodings `fixed` is may not be given explicitly.
43 /// Instead it requires `"minLength"` and `"maxLength"` for string and `"minItems"` and `"maxItems"` for array schemata be set to the same value.
44 ///
45 /// If `fixed` is given explicitly but `min` and `max` are different or one is missing the length encoding is not valid.
46 ///
47 /// ## Examples
48 ///
49 /// ```
50 /// # use binary_data_schema::{LengthEncoding, string::StringSchema};
51 /// # use serde_json::{json, from_value};
52 /// let schema = json!({
53 /// "type": "string",
54 /// "minLength": 2,
55 /// "maxLength": 2
56 /// });
57 /// let schema = from_value::<StringSchema>(schema)?;
58 /// assert!(matches!(schema, StringSchema::Utf8 { length: LengthEncoding::Fixed(2) }));
59 /// # Ok::<(), anyhow::Error>(())
60 /// ```
61 ///
62 /// ```
63 /// # use binary_data_schema::{LengthEncoding, array::ArraySchema};
64 /// # use serde_json::{json, from_value};
65 /// let schema = json!({
66 /// "type": "array",
67 /// "minItems": 2,
68 /// "lengthEncoding": { "@type": "fixed" },
69 /// "items": { "type": "boolean" }
70 /// });
71 /// assert!(from_value::<ArraySchema>(schema).is_err());
72 /// # Ok::<(), anyhow::Error>(())
73 /// ```
74 Fixed(usize),
75 /// Length Encoded → `"@type": "explicitlength"`.
76 ///
77 /// Length encoded means that the length of the value encoded is encoded at the beginning of the field.
78 /// How the length is encoded is defined via an integer schema.
79 ///
80 /// # Example
81 ///
82 /// ```
83 /// # use binary_data_schema::*;
84 /// # use valico::json_schema;
85 /// # use serde_json::{json, from_value};
86 /// let schema = json!({
87 /// "type": "string",
88 /// "lengthEncoding": {
89 /// "@type": "explicitlength",
90 /// "length": 1
91 /// }
92 /// });
93 ///
94 /// let mut scope = json_schema::Scope::new();
95 /// let j_schema = scope.compile_and_return(schema.clone(), false)?;
96 /// let schema = from_value::<DataSchema>(schema)?;
97 ///
98 /// let value = json!("tree");
99 /// assert!(j_schema.validate(&value).is_valid());
100 /// let mut encoded = Vec::new();
101 /// schema.encode(&mut encoded, &value)?;
102 /// let expected = [ 4, b't', b'r', b'e', b'e' ];
103 /// assert_eq!(&expected, encoded.as_slice());
104 ///
105 /// let mut encoded = std::io::Cursor::new(encoded);
106 /// let back = schema.decode(&mut encoded)?;
107 /// assert!(j_schema.validate(&back).is_valid());
108 /// assert_eq!(back, value);
109 /// # Ok::<(), anyhow::Error>(())
110 /// ```
111 LengthEncoded(IntegerSchema),
112 /// End Pattern → `"@type": "endpattern"`.
113 ///
114 /// With End Pattern the end of a variable length string or array is marked by a sentinel value.
115 /// The sentinel must be valid for the surrounding schema, i.e. for string schema it must adhere to the `"format"` and for array schema it must be valid for the `"items"` schema.
116 /// Furthermore, for string schema the sentinel must be encoded as one byte, e.g. an ASCII letter or if `"format": "binary"` two hex-digits.
117 ///
118 /// **Note:** The sentinel value is not allowed to be included in an encoded value!
119 ///
120 /// **Note2:** The sentinel values should bot be used with arrays with numeric items. The sentinel is tested for equality so rounding errors may result in not being able to identify the sentinel!
121 ///
122 /// # Examples
123 ///
124 /// ```
125 /// # use binary_data_schema::*;
126 /// # use valico::json_schema;
127 /// # use serde_json::{json, from_value};
128 /// let schema = json!({
129 /// "type": "string",
130 /// "lengthEncoding": {
131 /// "@type": "endpattern",
132 /// "sentinel": "!"
133 /// }
134 /// });
135 ///
136 /// let mut scope = json_schema::Scope::new();
137 /// let j_schema = scope.compile_and_return(schema.clone(), false)?;
138 /// let schema = from_value::<DataSchema>(schema)?;
139 ///
140 /// let value = json!("tree");
141 /// assert!(j_schema.validate(&value).is_valid());
142 /// let mut encoded = Vec::new();
143 /// schema.encode(&mut encoded, &value)?;
144 /// let expected = [ b't', b'r', b'e', b'e', b'!' ];
145 /// assert_eq!(&expected, encoded.as_slice());
146 ///
147 /// let mut encoded = std::io::Cursor::new(encoded);
148 /// let back = schema.decode(&mut encoded)?;
149 /// assert!(j_schema.validate(&back).is_valid());
150 /// assert_eq!(back, value);
151 /// # Ok::<(), anyhow::Error>(())
152 /// ```
153 ///
154 /// ```
155 /// # use binary_data_schema::*;
156 /// # use valico::json_schema;
157 /// # use serde_json::{json, from_value};
158 /// let schema = json!({
159 /// "type": "string",
160 /// "format": "binary",
161 /// "lengthEncoding": {
162 /// "@type": "endpattern",
163 /// "sentinel": "00"
164 /// }
165 /// });
166 ///
167 /// let mut scope = json_schema::Scope::new();
168 /// let j_schema = scope.compile_and_return(schema.clone(), false)?;
169 /// let schema = from_value::<DataSchema>(schema)?;
170 ///
171 /// let value = json!("beef");
172 /// assert!(j_schema.validate(&value).is_valid());
173 /// let mut encoded = Vec::new();
174 /// schema.encode(&mut encoded, &value)?;
175 /// let expected = [ 0xbe, 0xef, 0x00 ];
176 /// assert_eq!(&expected, encoded.as_slice());
177 ///
178 /// let mut encoded = std::io::Cursor::new(encoded);
179 /// let back = schema.decode(&mut encoded)?;
180 /// assert!(j_schema.validate(&back).is_valid());
181 /// assert_eq!(back, value);
182 /// # Ok::<(), anyhow::Error>(())
183 /// ```
184 ///
185 /// ```
186 /// # use binary_data_schema::*;
187 /// # use valico::json_schema;
188 /// # use serde_json::{json, from_value};
189 /// let schema = json!({
190 /// "type": "array",
191 /// "lengthEncoding": {
192 /// "@type": "endpattern",
193 /// "sentinel": false
194 /// },
195 /// "items": { "type": "boolean" }
196 /// });
197 ///
198 /// let mut scope = json_schema::Scope::new();
199 /// let j_schema = scope.compile_and_return(schema.clone(), false)?;
200 /// let schema = from_value::<DataSchema>(schema)?;
201 ///
202 /// let value = json!([ true, true, false ]);
203 /// let mut encoded = Vec::new();
204 /// // value contains the sentinel value `false`
205 /// assert!(schema.encode(&mut encoded, &value).is_err());
206 /// # Ok::<(), anyhow::Error>(())
207 /// ```
208 EndPattern { sentinel: T },
209 /// Capacity → `"@type": "capacity"`.
210 ///
211 /// With capacity there is always a certain capacity reserved for the field in the encoded byte string.
212 /// For string schemata the capacity is defined by `"maxLength"` where it refers to the maximal length of string that can be encoded.
213 /// For array schemata `"maxItems"` defines the capacity.
214 ///
215 /// Unused space is filled with `"padding"`.
216 /// The padding must fulfill the same requirements as the `"sentinel"` of end pattern.
217 ///
218 /// A value may consume the whole reserved space.
219 /// In this case no padding is inserted.
220 ///
221 /// # Example
222 ///
223 /// ```
224 /// # use binary_data_schema::*;
225 /// # use valico::json_schema;
226 /// # use serde_json::{json, from_value};
227 /// let schema = json!({
228 /// "type": "string",
229 /// "format": "binary",
230 /// "maxLength": 8,
231 /// "lengthEncoding": {
232 /// "@type": "capacity",
233 /// "padding": "00"
234 /// }
235 /// });
236 ///
237 /// let mut scope = json_schema::Scope::new();
238 /// let j_schema = scope.compile_and_return(schema.clone(), false)?;
239 /// let schema = from_value::<DataSchema>(schema)?;
240 ///
241 /// let value = json!("beef");
242 /// assert!(j_schema.validate(&value).is_valid());
243 /// let mut encoded = Vec::new();
244 /// schema.encode(&mut encoded, &value)?;
245 /// let expected = [ 0xbe, 0xef, 0x00, 0x00 ];
246 /// assert_eq!(&expected, encoded.as_slice());
247 ///
248 /// let mut encoded = std::io::Cursor::new(encoded);
249 /// let back = schema.decode(&mut encoded)?;
250 /// assert!(j_schema.validate(&back).is_valid());
251 /// assert_eq!(back, value);
252 /// # Ok::<(), anyhow::Error>(())
253 /// ```
254 Capacity { padding: T, capacity: usize },
255 /// Till End → `"@type": "tillend"`.
256 ///
257 /// Till End is the default length encoding.
258 /// It simply means that the end of an array or string is determined by the end of the byte string.
259 /// Accordingly, schemata with `"tillend"` length encoding are only allowed as the last property in an object schema.
260 TillEnd,
261}
262
263impl Default for RawLengthEncoding {
264 fn default() -> Self {
265 RawLengthEncoding::TillEnd
266 }
267}
268
269impl<T> LengthEncoding<T> {
270 pub fn fixed(fixed: usize) -> Self {
271 Self::Fixed(fixed)
272 }
273 pub fn length_encoded(schema: IntegerSchema) -> Self {
274 Self::LengthEncoded(schema)
275 }
276 pub fn end_pattern(pattern: T) -> Self {
277 Self::EndPattern { sentinel: pattern }
278 }
279 pub fn capacity(padding: T, capacity: usize) -> Self {
280 Self::Capacity { padding, capacity }
281 }
282 pub fn map<R, F>(self, f: F) -> LengthEncoding<R>
283 where
284 F: FnOnce(T) -> R,
285 {
286 match self {
287 Self::Fixed(fixed) => LengthEncoding::Fixed(fixed),
288 Self::LengthEncoded(schema) => LengthEncoding::LengthEncoded(schema),
289 Self::EndPattern { sentinel: pattern } => LengthEncoding::EndPattern {
290 sentinel: f(pattern),
291 },
292 Self::Capacity { padding, capacity } => LengthEncoding::Capacity {
293 padding: f(padding),
294 capacity,
295 },
296 Self::TillEnd => LengthEncoding::TillEnd,
297 }
298 }
299 pub fn try_map<R, E, F>(self, f: F) -> Result<LengthEncoding<R>, E>
300 where
301 F: FnOnce(T) -> Result<R, E>,
302 {
303 let new = match self {
304 Self::Fixed(fixed) => LengthEncoding::Fixed(fixed),
305 Self::LengthEncoded(schema) => LengthEncoding::LengthEncoded(schema),
306 Self::EndPattern { sentinel: pattern } => LengthEncoding::EndPattern {
307 sentinel: f(pattern)?,
308 },
309 Self::Capacity { padding, capacity } => LengthEncoding::Capacity {
310 padding: f(padding)?,
311 capacity,
312 },
313 Self::TillEnd => LengthEncoding::TillEnd,
314 };
315 Ok(new)
316 }
317}