1use std::{convert::TryFrom, io};
112
113use bstr::{ByteSlice, ByteVec as _};
114use byteorder::{ReadBytesExt, WriteBytesExt};
115use serde::{
116 de::{Deserializer, Error as DeError},
117 Deserialize,
118};
119use serde_json::Value;
120
121use crate::{
122 util::{LengthEncoding, RawLengthEncoding},
123 ArraySchema, Decoder, Encoder, IntegerSchema,
124};
125
126#[derive(Debug, thiserror::Error)]
128pub enum ValidationError {
129 #[error("The given end 'sentinel' or 'padding' is not of binary format: {0}")]
130 NotHexPattern(#[from] hex::FromHexError),
131 #[error("'sentinel' and 'padding' are limited to one byte but '{sentinel}' is longer")]
132 InvalidPattern { sentinel: String },
133 #[error("A fixed length string schema requires both 'maxLength' and 'minLength' given and having the same value")]
134 IncompleteFixedLength,
135 #[error("Length encoding 'capacity' requires 'maxLength'")]
136 MissingCapacity,
137 #[error("The provided sentinel or padding '{sentinel}' is not a string")]
138 NotAString { sentinel: Value },
139 #[error("Requested a fixed length or a capacity of {0}: Binary format strings have always an even length as bytes are mapped to two characters")]
140 OddLimit(usize),
141}
142
143#[derive(Debug, thiserror::Error)]
145pub enum EncodingError {
146 #[error("The value '{value}' can not be encoded with a string schema")]
147 InvalidValue { value: String },
148 #[error("Writing to buffer failed: {0}")]
149 WriteFail(#[from] io::Error),
150 #[error("The provided string is not of 'binary' format: {0}")]
151 InvalidHexString(#[from] hex::FromHexError),
152 #[error("Encoding the value length failed: {0}")]
153 LengthSchema(#[from] crate::integer::EncodingError),
154 #[error("Encoding the binary data failed: {0}")]
155 BinaryEncoding(#[from] crate::array::EncodingError),
156 #[error("Length of {len} bytes but only a fixed length of {fixed} is supported")]
157 NotFixedLength { len: usize, fixed: usize },
158 #[error("Contains the end sequence {0}")]
159 ContainsEndSequence(String),
160 #[error("Length of {len} bytes but only values up to a length of {cap} are valid")]
161 ExceedsCapacity { len: usize, cap: usize },
162 #[error("Length of {len} bytes but only a length up to {max} bytes can be encoded")]
163 ExceedsLengthEncoding { len: usize, max: usize },
164}
165
166#[derive(Debug, thiserror::Error)]
168pub enum DecodingError {
169 #[error("Reading encoded data failed: {0}")]
170 ReadFail(#[from] io::Error),
171 #[error("The encoded string is not valid UTF-8: {0}")]
172 NonUtf8STring(#[from] std::string::FromUtf8Error),
173 #[error("The encoded string is not valid UTF-8: {0}")]
174 NonUtf8Bstr(#[from] bstr::FromUtf8Error),
175 #[error("Decoding the value length failed: {0}")]
176 LengthSchema(#[from] crate::integer::DecodingError),
177 #[error("Decoding the binary data failed: {0}")]
178 BinaryDecoding(#[from] crate::array::DecodingError),
179 #[error("The encoded value '{read}' does not contain the endpattern '{pattern}'")]
180 NoPattern { read: String, pattern: String },
181}
182
183impl DecodingError {
184 pub fn due_to_eof(&self) -> bool {
185 match &self {
186 DecodingError::ReadFail(e) => e.kind() == std::io::ErrorKind::UnexpectedEof,
187 DecodingError::LengthSchema(e) => e.due_to_eof(),
188 _ => false,
189 }
190 }
191}
192
193#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
194#[serde(rename_all = "lowercase")]
195enum Format {
196 #[serde(skip)]
197 Utf8,
198 Binary,
199}
200
201#[derive(Debug, Clone, Deserialize)]
203#[serde(rename_all = "camelCase")]
204struct RawString {
205 #[serde(default)]
206 length_encoding: RawLengthEncoding,
207 max_length: Option<usize>,
208 min_length: Option<usize>,
209 format: Option<Format>,
210}
211
212#[derive(Debug, Clone)]
214pub enum StringSchema {
215 Utf8 { length: LengthEncoding<String> },
216 Binary { inner: ArraySchema },
217}
218
219impl Format {
220 fn validate_pattern(&self, sentinel: &str) -> Result<(), ValidationError> {
221 match self {
222 Format::Utf8 => {
223 if sentinel.len() == 1 {
224 Ok(())
225 } else {
226 Err(ValidationError::InvalidPattern {
227 sentinel: sentinel.to_owned(),
228 })
229 }
230 }
231 Format::Binary => {
232 let encoded = hex::decode(sentinel)?;
233 if encoded.len() == 1 {
234 Ok(())
235 } else {
236 Err(ValidationError::InvalidPattern {
237 sentinel: sentinel.to_owned(),
238 })
239 }
240 }
241 }
242 }
243}
244
245impl Default for Format {
246 fn default() -> Self {
247 Format::Utf8
248 }
249}
250
251impl TryFrom<RawString> for StringSchema {
252 type Error = ValidationError;
253
254 fn try_from(raw: RawString) -> Result<Self, Self::Error> {
255 let format = raw.format.unwrap_or_default();
256 let length = match (raw.min_length, raw.max_length) {
257 (Some(min), Some(max)) if min == max => Ok(LengthEncoding::Fixed(max)),
258 _ => match raw.length_encoding {
259 RawLengthEncoding::Fixed => Err(ValidationError::IncompleteFixedLength),
260 RawLengthEncoding::ExplicitLength(schema) => {
261 Ok(LengthEncoding::LengthEncoded(schema))
262 }
263 RawLengthEncoding::EndPattern { sentinel } => {
264 let pat_str = sentinel
265 .as_str()
266 .ok_or_else(|| ValidationError::NotAString {
267 sentinel: sentinel.clone(),
268 })?;
269 format.validate_pattern(pat_str)?;
270 Ok(LengthEncoding::EndPattern { sentinel })
271 }
272 RawLengthEncoding::Capacity { padding } => {
273 let capacity = raw.max_length.ok_or(ValidationError::MissingCapacity)?;
274 let pad_str = padding
275 .as_str()
276 .ok_or_else(|| ValidationError::NotAString {
277 sentinel: padding.clone(),
278 })?;
279 format.validate_pattern(pad_str)?;
280 Ok(LengthEncoding::Capacity { capacity, padding })
281 }
282 RawLengthEncoding::TillEnd => Ok(LengthEncoding::TillEnd),
283 },
284 }?;
285 let schema = match format {
286 Format::Utf8 => Self::Utf8 {
287 length: length
288 .map(|v| v.as_str().expect("ensured at length validation").to_owned()),
289 },
290 Format::Binary => {
291 let mut length = length.map(|v| {
292 let string = v.as_str().expect("ensured at length validation");
293 let byte = hex::decode(string).expect("ensured at length validation");
294 byte[0].into()
295 });
296 match &mut length {
297 LengthEncoding::Fixed(number)
298 | LengthEncoding::Capacity {
299 capacity: number, ..
300 } => {
301 if *number % 2 == 1 {
302 return Err(ValidationError::OddLimit(*number));
303 }
304 *number /= 2;
305 }
306 _ => {}
307 };
308 Self::Binary {
309 inner: ArraySchema::byte_array(length).expect("ensured at length validation"),
310 }
311 }
312 };
313 Ok(schema)
314 }
315}
316
317impl<'de> Deserialize<'de> for StringSchema {
318 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
319 where
320 D: Deserializer<'de>,
321 {
322 let raw = RawString::deserialize(deserializer)?;
323 Self::try_from(raw).map_err(D::Error::custom)
324 }
325}
326
327impl Encoder for StringSchema {
328 type Error = EncodingError;
329
330 fn encode<W>(&self, target: &mut W, value: &Value) -> Result<usize, Self::Error>
331 where
332 W: io::Write + WriteBytesExt,
333 {
334 let value = value.as_str().ok_or_else(|| EncodingError::InvalidValue {
335 value: value.to_string(),
336 })?;
337 let written = match &self {
338 StringSchema::Utf8 { length } => match length {
339 LengthEncoding::Fixed(len) => {
340 matches_fixed_len(value, *len)?;
341 target.write_all(value.as_bytes())?;
342 *len
343 }
344 LengthEncoding::LengthEncoded(int) => {
345 exceeds_length(value, int)?;
346 int.encode(target, &value.len().into())?;
347 target.write_all(value.as_bytes())?;
348 value.len() + int.length()
349 }
350 LengthEncoding::EndPattern { sentinel } => {
351 contains_end_sequencs(value, sentinel)?;
352 target.write_all(value.as_bytes())?;
353 target.write_all(sentinel.as_bytes())?;
354 value.len() + sentinel.len()
355 }
356 LengthEncoding::Capacity {
357 padding, capacity, ..
358 } => {
359 exceeds_cap(value, *capacity)?;
360 target.write_all(value.as_bytes())?;
361 fill_rest(target, *capacity, value.len(), padding)?;
362 *capacity
363 }
364 LengthEncoding::TillEnd => {
365 target.write_all(value.as_bytes())?;
366 value.len()
367 }
368 },
369 StringSchema::Binary { inner } => {
370 let value = hex::decode(value)?;
371 inner.encode(target, &(value.into()))?
372 }
373 };
374 Ok(written)
375 }
376}
377
378fn matches_fixed_len(value: &str, len: usize) -> Result<(), EncodingError> {
379 if value.len() != len {
380 Err(EncodingError::NotFixedLength {
381 len: value.len(),
382 fixed: len,
383 })
384 } else {
385 Ok(())
386 }
387}
388
389fn exceeds_length(value: &str, schema: &IntegerSchema) -> Result<(), EncodingError> {
390 if value.len() > schema.max_value() {
391 Err(EncodingError::ExceedsLengthEncoding {
392 len: value.len(),
393 max: schema.max_value(),
394 })
395 } else {
396 Ok(())
397 }
398}
399
400fn contains_end_sequencs(value: &str, pattern: &str) -> Result<(), EncodingError> {
401 if value.contains(&pattern) {
402 Err(EncodingError::ContainsEndSequence(pattern.to_owned()))
403 } else {
404 Ok(())
405 }
406}
407
408fn exceeds_cap(value: &str, cap: usize) -> Result<(), EncodingError> {
409 if value.len() > cap {
410 Err(EncodingError::ExceedsCapacity {
411 len: value.len(),
412 cap,
413 })
414 } else {
415 Ok(())
416 }
417}
418
419fn fill_rest<W: io::Write>(
420 target: W,
421 cap: usize,
422 filled: usize,
423 filler: &str,
424) -> Result<usize, EncodingError> {
425 let mut target = target;
426 let to_fill = cap - filled;
427 for _ in 0..to_fill {
428 target.write_all(filler.as_bytes())?;
430 }
431 Ok(to_fill)
432}
433
434impl Decoder for StringSchema {
435 type Error = DecodingError;
436
437 fn decode<R>(&self, target: &mut R) -> Result<Value, Self::Error>
438 where
439 R: io::Read + ReadBytesExt,
440 {
441 let value = match self {
442 StringSchema::Utf8 { length } => {
443 let bytes = match length {
444 LengthEncoding::Fixed(length) => read_with_length(target, *length)?,
445 LengthEncoding::LengthEncoded(schema) => {
446 let length = schema
447 .decode(target)?
448 .as_u64()
449 .expect("length is always u64");
450 read_with_length(target, length as _)?
451 }
452 LengthEncoding::EndPattern { sentinel: pattern } => {
453 read_with_pattern(target, pattern, usize::MAX)?
454 }
455 LengthEncoding::Capacity { padding, capacity } => {
456 read_with_pattern(target, padding, *capacity)?
457 }
458 LengthEncoding::TillEnd => {
459 let mut buf = Vec::new();
460 target.read_to_end(&mut buf)?;
461 buf
462 }
463 };
464 String::from_utf8(bytes)?.into()
465 }
466 StringSchema::Binary { inner } => {
467 let array = inner.decode(target)?;
468 let bytes = array
469 .as_array()
470 .expect("Is an array schema")
471 .iter()
472 .map(|v| v.as_u64().expect("elements are u8") as _)
473 .collect::<Vec<_>>();
474 let hex_string = hex::encode(bytes);
475 hex_string.into()
476 }
477 };
478
479 Ok(value)
480 }
481}
482
483fn read_with_length<R>(mut reader: R, length: usize) -> Result<Vec<u8>, DecodingError>
484where
485 R: io::Read,
486{
487 let mut buf = vec![0; length];
488 reader.read_exact(buf.as_mut_slice())?;
489 Ok(buf)
490}
491
492fn read_with_pattern<R>(reader: R, pattern: &str, max: usize) -> Result<Vec<u8>, DecodingError>
493where
494 R: io::Read,
495{
496 let mut buf = Vec::new();
497 for b in reader.bytes() {
498 let b = b?;
499 buf.push(b);
500 if buf.ends_with_str(pattern) {
501 buf.pop();
502 return Ok(buf);
503 }
504 if buf.len() == max {
505 return Ok(buf);
506 }
507 }
508
509 Err(DecodingError::NoPattern {
510 read: buf.into_string()?,
511 pattern: pattern.to_owned(),
512 })
513}
514
515#[cfg(test)]
516mod test {
517 use super::*;
518 use crate::array::ArraySchema;
519 use anyhow::Result;
520 use serde_json::{from_value, json};
521
522 #[test]
523 fn fixed() -> Result<()> {
524 let schema = json!({
525 "minLength": 4,
526 "maxLength": 4,
527 "lengthEncoding": {
528 "@type": "endpattern",
529 "sentinel": "!"
530 }
531 });
532 let schema: StringSchema = from_value(schema)?;
533 assert!(matches!(
535 schema,
536 StringSchema::Utf8 {
537 length: LengthEncoding::Fixed(4)
538 }
539 ));
540
541 let mut buffer = vec![];
542 let value = "Hans".to_string();
543 let json: Value = value.clone().into();
544 assert_eq!(4, schema.encode(&mut buffer, &json)?);
545 assert_eq!(value.as_bytes(), buffer.as_slice());
546
547 let invalid = json!("Berta");
548 assert!(schema.encode(&mut buffer, &invalid).is_err());
549
550 Ok(())
551 }
552
553 #[test]
554 fn incomplete_fixed() -> Result<()> {
555 let schema = json!({
556 "maxLength": 5,
557 "lengthEncoding": { "@type": "fixed" }
558 });
559 assert!(from_value::<StringSchema>(schema).is_err());
560
561 Ok(())
562 }
563
564 #[test]
565 fn length() -> Result<()> {
566 let schema = json!({
567 "lengthEncoding": {
568 "@type": "explicitlength",
569 "length": 1
570 }
571 });
572 let schema: StringSchema = from_value(schema)?;
573 assert!(matches!(
574 schema,
575 StringSchema::Utf8 {
576 length: LengthEncoding::LengthEncoded(_)
577 }
578 ));
579 println!("schema: {:#?}", schema);
580
581 let mut buffer = vec![];
582 let value = "Hans".to_string();
583 let json: Value = value.clone().into();
584 assert_eq!(5, schema.encode(&mut buffer, &json)?);
585 let expected = [4, b'H', b'a', b'n', b's'];
586 assert_eq!(&expected, buffer.as_slice());
587
588 Ok(())
589 }
590
591 #[test]
592 fn simple_pattern() -> Result<()> {
593 let schema = json!({
594 "lengthEncoding": {
595 "@type": "endpattern",
596 "sentinel": "\0"
597 }
598 });
599 let schema: StringSchema = from_value(schema)?;
600 assert!(matches!(
601 schema,
602 StringSchema::Utf8 {
603 length: LengthEncoding::EndPattern { .. }
604 }
605 ));
606
607 let mut buffer = vec![];
608 let value = "Hans".to_string();
609 let json: Value = value.clone().into();
610 assert_eq!(5, schema.encode(&mut buffer, &json)?);
611 let expected = [b'H', b'a', b'n', b's', 0x00];
612 assert_eq!(&expected, buffer.as_slice());
613
614 Ok(())
615 }
616
617 #[test]
618 fn simple_pattern_binary() -> Result<()> {
619 println!("entry");
620 let schema = json!({
621 "lengthEncoding": {
622 "@type": "endpattern",
623 "sentinel": "00"
624 },
625 "format": "binary",
626 });
627 let schema: StringSchema = from_value(schema)?;
628 assert!(matches!(
629 schema,
630 StringSchema::Binary {
631 inner: ArraySchema {
632 length: LengthEncoding::EndPattern { .. },
633 ..
634 }
635 }
636 ));
637
638 let mut buffer = vec![];
639 let value = "6911dead".to_string();
640 let json: Value = value.clone().into();
641 assert_eq!(5, schema.encode(&mut buffer, &json)?);
642 let expected = [0x69, 0x11, 0xde, 0xad, 0x00];
643 assert_eq!(&expected, buffer.as_slice());
644
645 Ok(())
646 }
647
648 #[test]
649 fn default() -> Result<()> {
650 let schema = json!({});
651 let schema: StringSchema = from_value(schema)?;
652 assert!(matches!(
653 schema,
654 StringSchema::Utf8 {
655 length: LengthEncoding::TillEnd
656 }
657 ));
658
659 let mut buffer = vec![];
660 let value = "Hans".to_string();
661 let json: Value = value.clone().into();
662 assert_eq!(4, schema.encode(&mut buffer, &json)?);
663 let expected = [b'H', b'a', b'n', b's'];
664 assert_eq!(&expected, buffer.as_slice());
665
666 Ok(())
667 }
668
669 #[test]
670 fn invalid_pattern() -> Result<()> {
671 let schema = json!({
674 "lengthEncoding": {
675 "@type": "endpattern",
676 "sentinel": "ß"
677 }
678 });
679 assert!(from_value::<StringSchema>(schema).is_err());
680
681 Ok(())
682 }
683
684 #[test]
685 fn other_pattern() -> Result<()> {
686 let schema = json!({
687 "lengthEncoding": {
688 "@type": "endpattern",
689 "sentinel": "!"
690 }
691 });
692 let schema: StringSchema = from_value(schema)?;
693 assert!(matches!(
694 schema,
695 StringSchema::Utf8 {
696 length: LengthEncoding::EndPattern { .. }
697 }
698 ));
699
700 let mut buffer = vec![];
701 let value = "Hans".to_string();
702 let json: Value = value.clone().into();
703 assert_eq!(5, schema.encode(&mut buffer, &json)?);
704 let expected = [b'H', b'a', b'n', b's', b'!'];
705 assert_eq!(&expected, buffer.as_slice());
706
707 Ok(())
708 }
709
710 #[test]
711 fn pattern_included() -> Result<()> {
712 let schema = json!({
713 "lengthEncoding": {
714 "@type": "endpattern",
715 "sentinel": "a"
716 }
717 });
718 let schema: StringSchema = from_value(schema)?;
719 assert!(matches!(
720 schema,
721 StringSchema::Utf8 {
722 length: LengthEncoding::EndPattern { .. }
723 }
724 ));
725
726 let mut buffer = vec![];
727 let value = "Hans".to_string();
728 let json: Value = value.clone().into();
729 assert!(schema.encode(&mut buffer, &json).is_err());
731
732 Ok(())
733 }
734
735 #[test]
736 fn invalid_padding() -> Result<()> {
737 let schema = json!({
738 "lengthEncoding": {
739 "@type": "capacity",
740 "padding": "µ"
741 },
742 "maxLength": 10
743 });
744 assert!(from_value::<StringSchema>(schema).is_err());
746
747 Ok(())
748 }
749
750 #[test]
751 fn missing_capacity() -> Result<()> {
752 let schema = json!({
753 "lengthEncoding": {
754 "@type": "capacity",
755 "padding": "\0"
756 }
757 });
758 assert!(from_value::<StringSchema>(schema).is_err());
760
761 Ok(())
762 }
763
764 #[test]
765 fn capacity() -> Result<()> {
766 let schema = json!({
767 "lengthEncoding": {
768 "@type": "capacity",
769 "padding": "!"
770 },
771 "maxLength": 10
772 });
773 let schema: StringSchema = from_value(schema)?;
774 assert!(matches!(
775 schema,
776 StringSchema::Utf8 {
777 length: LengthEncoding::Capacity { .. }
778 }
779 ));
780
781 let mut buffer = vec![];
782 let value = "Hans".to_string();
783 let json: Value = value.clone().into();
784 assert_eq!(10, schema.encode(&mut buffer, &json)?);
785 let expected: [u8; 10] = [b'H', b'a', b'n', b's', b'!', b'!', b'!', b'!', b'!', b'!'];
786 assert_eq!(&expected, buffer.as_slice());
788
789 Ok(())
790 }
791
792 #[test]
793 fn binary_capacity() -> Result<()> {
794 println!("entry");
795 let schema = json!({
796 "lengthEncoding": {
797 "@type": "capacity",
798 "padding": "00"
799 },
800 "maxLength": 10,
801 "format": "binary"
802 });
803 let schema: StringSchema = from_value(schema)?;
804 assert!(matches!(
805 schema,
806 StringSchema::Binary {
807 inner: ArraySchema {
808 length: LengthEncoding::Capacity { capacity: 5, .. },
809 ..
810 }
811 }
812 ));
813
814 let mut buffer = vec![];
815 let value = "6911dead".to_string();
816 let json: Value = value.clone().into();
817 assert_eq!(5, schema.encode(&mut buffer, &json)?);
818 let expected = [0x69, 0x11, 0xde, 0xad, 0x00];
819 assert_eq!(&expected, buffer.as_slice());
820
821 let mut read = std::io::Cursor::new(buffer);
822 let decoded = schema.decode(&mut read)?;
823 assert_eq!(json, decoded);
824
825 Ok(())
826 }
827}