binary_data_schema/
array.rs

1//! Implementation of the array schema
2//!
3//! # Length of an Array
4//!
5//! The length of an array is the number of elements stored.
6//!
7//! # Parameters
8//!
9//! | Key           | Type     | Default  | Comment |
10//! | ------------- | --------:| --------:| ------- |
11//! | `"lengthEncoding"` | `object` | `{ "type": "tillend" }` | The way the length of the string is communicated |
12//! | `"minItems"`  |   `uint` | optional | Minimal number of items in the array |
13//! | `"maxItems"`  |   `uint` | optional | Maximal number of items in the array |
14//! | `"items"`     | data schema | required | Schema validating the elements of the array |
15//!
16//! ## Validation
17//!
18//! `"lengthEncoding"` has its own validation rules (see [`LengthEncoding`](crate::LengthEncoding)).
19//! This also includes the validity of the values of `"minItems"` and `"maxItems"`.
20//!
21//! In contrast to JSON schema, BDS does not support [tuples].
22//! Accordingly, it is only allowed to have a single data schema as the value of `"items"`.
23//!
24//! # Features
25//!
26//! Apart from the length encoding that arrays schemata share with string schemata,
27//! there are no special features implemented for array schemata.
28//! Neither [tuple validation] nor [uniqueness].
29//!
30//! [tuples]: https://json-schema.org/understanding-json-schema/reference/array.html#tuple-validation
31//! [tuple validation]: https://json-schema.org/understanding-json-schema/reference/array.html#tuple-validation
32//! [uniqueness]: https://json-schema.org/understanding-json-schema/reference/array.html#uniqueness
33
34use std::{convert::TryFrom, io};
35
36use byteorder::{ReadBytesExt, WriteBytesExt};
37use serde::{
38    de::{Deserializer, Error as DeError},
39    Deserialize,
40};
41use serde_json::Value;
42
43use crate::{
44    integer::{self as int, IntegerSchema},
45    util::*,
46    DataSchema, Decoder, Encoder, Error, Result,
47};
48
49/// Errors validating an [ArraySchema].
50#[derive(Debug, thiserror::Error)]
51pub enum ValidationError {
52    #[error("A fixed length array schema requires both 'maxItems' and 'minItems' given and having the same value")]
53    IncompleteFixedLength,
54    #[error("Patterns and/or paddings must be encodable with the given schema: '{value}' can not be encoded with a {type_} schema: {error}")]
55    InvalidPatternOrPadding {
56        value: Value,
57        type_: &'static str,
58        error: Box<Error>,
59    },
60    #[error("Length encoding 'capacity' requires 'maxItems'")]
61    MissingCapacity,
62}
63
64/// Errors encoding a string with an [ArraySchema].
65#[derive(Debug, thiserror::Error)]
66pub enum EncodingError {
67    #[error("The value '{value}' can not be encoded with an array schema")]
68    InvalidValue { value: String },
69    #[error("Writing to buffer failed: {0}")]
70    WriteFail(#[from] io::Error),
71    #[error("Could not encode length: {0}")]
72    EncodingLength(#[from] int::EncodingError),
73    #[error("Encoding sub-schema failed: {0}")]
74    SubSchema(Box<Error>),
75    #[error("{len} elements in array but only a fixed number of {fixed} elements is supported")]
76    NotFixedLength { len: usize, fixed: usize },
77    #[error("{len} elements in the array but only a length up to {max} elementy can be encoded")]
78    ExceedsLengthEncoding { len: usize, max: usize },
79    #[error("Array contains the end pattern or the padding {0}")]
80    ContainsPatternOrPadding(Value),
81    #[error("{len} elements in array but only values up to {cap} elements are valid")]
82    ExceedsCapacity { len: usize, cap: usize },
83}
84
85impl From<Error> for EncodingError {
86    fn from(e: Error) -> Self {
87        EncodingError::SubSchema(Box::new(e))
88    }
89}
90
91/// Errors decoding a string with an [ArraySchema].
92#[derive(Debug, thiserror::Error)]
93pub enum DecodingError {
94    #[error("Reading encoded data failed: {0}")]
95    ReadFail(#[from] io::Error),
96    #[error("Decoding sub-schema failed: {0}")]
97    SubSchema(Box<Error>),
98    #[error("Could not deencode length: {0}")]
99    DecodingLength(#[from] int::DecodingError),
100}
101
102impl From<Error> for DecodingError {
103    fn from(e: Error) -> Self {
104        DecodingError::SubSchema(Box::new(e))
105    }
106}
107
108impl DecodingError {
109    pub fn due_to_eof(&self) -> bool {
110        matches!(self, Self::ReadFail(e) if e.kind() == std::io::ErrorKind::UnexpectedEof)
111    }
112}
113
114/// How is the length of variable sized data encoded.
115#[derive(Debug, Clone, Deserialize)]
116#[serde(rename_all = "camelCase")]
117struct RawArray {
118    #[serde(default)]
119    length_encoding: RawLengthEncoding,
120    max_items: Option<usize>,
121    min_items: Option<usize>,
122    items: DataSchema,
123}
124
125/// The array schema to describe arrays of homogeneous elements (further information on [the module's documentation](index.html)).
126///
127/// Contrary to the JSON schema's array schema tuples are not supported.
128#[derive(Debug, Clone)]
129pub struct ArraySchema {
130    pub(crate) length: LengthEncoding<Value>,
131    items: DataSchema,
132}
133
134fn validate_value(value: &Value, schema: &DataSchema) -> Result<(), ValidationError> {
135    let mut buf = Vec::new();
136    match schema.encode(&mut buf, value) {
137        Ok(_) => Ok(()),
138        Err(e) => Err(ValidationError::InvalidPatternOrPadding {
139            value: value.clone(),
140            type_: schema.type_(),
141            error: Box::new(e),
142        }),
143    }
144}
145
146impl TryFrom<RawArray> for ArraySchema {
147    type Error = ValidationError;
148
149    fn try_from(raw: RawArray) -> Result<Self, Self::Error> {
150        let schema = raw.items;
151        let length = match (raw.min_items, raw.max_items) {
152            (Some(min), Some(max)) if min == max => Ok(LengthEncoding::Fixed(min)),
153            _ => match raw.length_encoding {
154                RawLengthEncoding::Fixed => Err(ValidationError::IncompleteFixedLength),
155                RawLengthEncoding::ExplicitLength(schema) => {
156                    Ok(LengthEncoding::LengthEncoded(schema))
157                }
158                RawLengthEncoding::EndPattern { sentinel: pattern } => {
159                    validate_value(&pattern, &schema)?;
160                    Ok(LengthEncoding::EndPattern { sentinel: pattern })
161                }
162                RawLengthEncoding::Capacity { padding } => {
163                    let capacity = raw.max_items.ok_or(ValidationError::MissingCapacity)?;
164                    validate_value(&padding, &schema)?;
165                    Ok(LengthEncoding::Capacity { padding, capacity })
166                }
167                RawLengthEncoding::TillEnd => Ok(LengthEncoding::TillEnd),
168            },
169        }?;
170
171        Ok(Self {
172            length,
173            items: schema,
174        })
175    }
176}
177
178impl<'de> Deserialize<'de> for ArraySchema {
179    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
180    where
181        D: Deserializer<'de>,
182    {
183        let raw = RawArray::deserialize(deserializer)?;
184        ArraySchema::try_from(raw).map_err(D::Error::custom)
185    }
186}
187
188impl ArraySchema {
189    pub fn byte_array(length: LengthEncoding<Value>) -> Result<Self, ValidationError> {
190        let byte_schema = IntegerSchema::unsigned_byte().into();
191        match &length {
192            LengthEncoding::EndPattern { sentinel: value }
193            | LengthEncoding::Capacity { padding: value, .. } => {
194                validate_value(value, &byte_schema)?;
195            }
196            _ => {}
197        }
198
199        Ok(Self {
200            length,
201            items: byte_schema,
202        })
203    }
204    fn valid_slice(&self, slice: &[Value]) -> Result<(), EncodingError> {
205        match &self.length {
206            LengthEncoding::Fixed(length) => {
207                if slice.len() != *length {
208                    Err(EncodingError::NotFixedLength {
209                        len: slice.len(),
210                        fixed: *length,
211                    })
212                } else {
213                    Ok(())
214                }
215            }
216            LengthEncoding::LengthEncoded(schema) => {
217                if schema.max_value() < slice.len() {
218                    Err(EncodingError::ExceedsLengthEncoding {
219                        len: slice.len(),
220                        max: schema.max_value(),
221                    })
222                } else {
223                    Ok(())
224                }
225            }
226            LengthEncoding::EndPattern { sentinel: pattern } => {
227                if slice.iter().any(|v| v == pattern) {
228                    Err(EncodingError::ContainsPatternOrPadding(pattern.clone()))
229                } else {
230                    Ok(())
231                }
232            }
233            LengthEncoding::Capacity { padding, capacity } => {
234                if *capacity < slice.len() {
235                    Err(EncodingError::ExceedsCapacity {
236                        len: slice.len(),
237                        cap: *capacity,
238                    })
239                } else if slice.iter().any(|v| v == padding) {
240                    Err(EncodingError::ContainsPatternOrPadding(padding.clone()))
241                } else {
242                    Ok(())
243                }
244            }
245            LengthEncoding::TillEnd => Ok(()),
246        }
247    }
248}
249
250impl Encoder for ArraySchema {
251    type Error = EncodingError;
252
253    fn encode<W>(&self, target: &mut W, value: &Value) -> Result<usize, Self::Error>
254    where
255        W: io::Write + WriteBytesExt,
256    {
257        let value = value
258            .as_array()
259            .ok_or_else(|| EncodingError::InvalidValue {
260                value: value.to_string(),
261            })?;
262        let len = value.len();
263        self.valid_slice(value)?;
264
265        let mut written = 0;
266        // pre-value
267        if let LengthEncoding::LengthEncoded(schema) = &self.length {
268            let len = len as u64;
269            written += schema.encode(target, &(len.into()))?;
270        }
271        // write array
272        for v in value.iter() {
273            written += self.items.encode(target, v)?;
274        }
275        // post-value
276        match &self.length {
277            LengthEncoding::EndPattern { sentinel } => {
278                written += self.items.encode(target, sentinel)?;
279            }
280            LengthEncoding::Capacity { padding, capacity } => {
281                let left = *capacity - len;
282                for _ in 0..left {
283                    written += self.items.encode(target, padding)?;
284                }
285            }
286            _ => {}
287        }
288
289        Ok(written)
290    }
291}
292
293impl Decoder for ArraySchema {
294    type Error = DecodingError;
295
296    fn decode<R>(&self, target: &mut R) -> Result<Value, Self::Error>
297    where
298        R: io::Read + ReadBytesExt,
299    {
300        let elements = match &self.length {
301            LengthEncoding::Fixed(len) => (0..*len)
302                .map(|_| self.items.decode(target))
303                .collect::<Result<Vec<_>, _>>()?,
304            LengthEncoding::LengthEncoded(schema) => {
305                let len = schema
306                    .decode(target)?
307                    .as_u64()
308                    .expect("counts are always unsigned ints");
309                (0..len)
310                    .map(|_| self.items.decode(target))
311                    .collect::<Result<Vec<_>, _>>()?
312            }
313            LengthEncoding::EndPattern { sentinel: pattern } => {
314                let mut elements = Vec::new();
315                loop {
316                    let element = self.items.decode(target)?;
317                    if element != *pattern {
318                        elements.push(element);
319                    } else {
320                        break;
321                    }
322                }
323                elements
324            }
325            LengthEncoding::Capacity {
326                padding: pattern,
327                capacity,
328            } => {
329                let mut elements = Vec::new();
330                for _ in 0..*capacity {
331                    let element = self.items.decode(target)?;
332                    if element != *pattern {
333                        elements.push(element);
334                    } else {
335                        break;
336                    }
337                }
338                elements
339            }
340            LengthEncoding::TillEnd => {
341                let mut elements = Vec::new();
342                loop {
343                    match self.items.decode(target) {
344                        Ok(element) => elements.push(element),
345                        Err(e) if e.due_to_eof() => break,
346                        Err(e) => return Err(e.into()),
347                    }
348                }
349                elements
350            }
351        };
352
353        Ok(elements.into())
354    }
355}
356
357#[cfg(test)]
358mod test {
359    use super::*;
360    use anyhow::Result;
361    use serde_json::{from_value, json};
362
363    #[test]
364    fn default() -> Result<()> {
365        let schema = json!({});
366        let schema = from_value::<ArraySchema>(schema);
367        assert!(schema.is_err());
368        Ok(())
369    }
370    #[test]
371    fn schema_only() -> Result<()> {
372        let schema = json!({
373            "items": {
374                "type": "boolean"
375            }
376        });
377        let schema = from_value::<ArraySchema>(schema)?;
378        assert!(matches!(
379            schema,
380            ArraySchema {
381                length: LengthEncoding::TillEnd,
382                ..
383            }
384        ));
385        Ok(())
386    }
387    #[test]
388    fn fixed() -> Result<()> {
389        let schema = json!({
390            "minItems": 2,
391            "maxItems": 2,
392            "items": {
393                "type": "boolean"
394            }
395        });
396        let schema = from_value::<ArraySchema>(schema)?;
397        assert!(matches!(
398            schema,
399            ArraySchema {
400                length: LengthEncoding::Fixed { .. },
401                ..
402            }
403        ));
404
405        let value = json!([false, true]);
406        let mut buffer = vec![];
407        assert_eq!(2, schema.encode(&mut buffer, &value)?);
408        let expected: [u8; 2] = [0, 1];
409        assert_eq!(&expected, buffer.as_slice());
410
411        Ok(())
412    }
413    #[test]
414    fn length() -> Result<()> {
415        let schema = json!({
416            "lengthEncoding": {
417                "@type": "explicitlength",
418                "length": 1,
419                "signed": false
420            },
421            "items": {
422                "type": "boolean"
423            }
424        });
425        let schema = from_value::<ArraySchema>(schema)?;
426        assert!(matches!(
427            schema,
428            ArraySchema {
429                length: LengthEncoding::LengthEncoded(_),
430                ..
431            }
432        ));
433
434        let value = json!([false, true]);
435        let mut buffer = vec![];
436        assert_eq!(3, schema.encode(&mut buffer, &value)?);
437        let expected: [u8; 3] = [2, 0, 1];
438        assert_eq!(&expected, buffer.as_slice());
439
440        Ok(())
441    }
442}