bencodex/codec/
decode.rs

1use super::types::*;
2use num_bigint::BigInt;
3use num_traits::ToPrimitive;
4use std::collections::BTreeMap;
5use std::error::Error;
6use std::fmt;
7use std::result::Result;
8use std::str;
9use std::str::FromStr;
10
11/// The error type which is returned from decoding a Bencodex value through [`Decode::decode`].
12#[derive(Debug, PartialEq)]
13pub enum DecodeError {
14    /// This should be used when it failed to decode. In future, it will be separated more and more.
15    InvalidBencodexValueError,
16    /// This should be used when it failed to decode because there is unexpected token appeared while decoding.
17    ///
18    /// # Example
19    ///
20    /// For example, The encoded bytes of [`BencodexValue::Number`] are formed as 'i{}e' (e.g., 'i0e', 'i2147483647e'). If it is not satisified, it should be result through inside [`Err`].
21    ///
22    /// ```
23    /// use bencodex::{ Decode, DecodeError };
24    ///
25    /// //                     v -- should be b'0' ~ b'9' digit.
26    /// let vec = vec![b'i', b':', b'e'];
27    /// let error = vec.decode().unwrap_err();
28    /// let expected_error = DecodeError::UnexpectedTokenError {
29    ///     token: b':',
30    ///     point: 1,
31    /// };
32    /// assert_eq!(expected_error, error);
33    /// ```
34    UnexpectedTokenError { token: u8, point: usize },
35}
36
37impl fmt::Display for DecodeError {
38    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
39        write!(f, "{:?}", self)
40    }
41}
42
43impl Error for DecodeError {}
44
45/// `Decode` is a trait to decode a [Bencodex] value.
46///
47/// [Bencodex]: https://bencodex.org/
48pub trait Decode {
49    /// Decodes a [Bencodex] value to return from this type.
50    ///
51    /// If decoding succeeds, return the value inside [`Ok`]. Otherwise, return the [`DecodeError`] inside [`Err`].
52    ///
53    /// # Examples
54    /// Basic usage with [`Vec<u8>`], the default implementor which implements `Decode`.
55    /// ```
56    /// use bencodex::{ Decode, BencodexValue };
57    ///
58    /// let vec = vec![b'n'];
59    /// let null = vec.decode().unwrap();
60    ///
61    /// assert_eq!(BencodexValue::Null, null);
62    /// ```
63    /// [Bencodex]: https://bencodex.org/
64    fn decode(self) -> Result<BencodexValue, DecodeError>;
65}
66
67trait ShouldNotBeNone<T> {
68    fn should_not_be_none(self) -> Result<T, DecodeError>;
69}
70
71impl ShouldNotBeNone<u8> for Option<&u8> {
72    #[inline]
73    fn should_not_be_none(self) -> Result<u8, DecodeError> {
74        match self {
75            None => Err(DecodeError::InvalidBencodexValueError),
76            Some(v) => Ok(*v),
77        }
78    }
79}
80
81trait Expect<T> {
82    fn expect(self, expected: u8, point: usize) -> Result<(), DecodeError>;
83}
84
85impl Expect<u8> for u8 {
86    #[inline]
87    fn expect(self, expected: u8, point: usize) -> Result<(), DecodeError> {
88        if self != expected {
89            Err(DecodeError::UnexpectedTokenError { token: self, point })
90        } else {
91            Ok(())
92        }
93    }
94}
95
96fn decode_impl(vector: &[u8], start: usize) -> Result<(BencodexValue, usize), DecodeError> {
97    if start >= vector.len() {
98        return Err(DecodeError::InvalidBencodexValueError);
99    }
100
101    match vector[start] {
102        b'd' => decode_dict_impl(vector, start),
103        b'l' => decode_list_impl(vector, start),
104        b'u' => decode_unicode_string_impl(vector, start),
105        b'i' => decode_number_impl(vector, start),
106        b'0'..=b'9' => decode_byte_string_impl(vector, start),
107        b't' => Ok((BencodexValue::Boolean(true), 1)),
108        b'f' => Ok((BencodexValue::Boolean(false), 1)),
109        b'n' => Ok((BencodexValue::Null, 1)),
110        _ => Err(DecodeError::UnexpectedTokenError {
111            token: vector[start],
112            point: start,
113        }),
114    }
115}
116
117// start must be on 'd'
118fn decode_dict_impl(vector: &[u8], start: usize) -> Result<(BencodexValue, usize), DecodeError> {
119    vector
120        .get(start)
121        .should_not_be_none()?
122        .expect(b'd', start)?;
123
124    let mut tsize: usize = 1;
125    let mut index = start + tsize;
126    let mut map = BTreeMap::new();
127    while vector.get(index).should_not_be_none()? != b'e' {
128        let (value, size) = decode_impl(vector, index)?;
129        let key = match value {
130            BencodexValue::Text(s) => BencodexKey::Text(s),
131            BencodexValue::Binary(b) => BencodexKey::Binary(b),
132            _ => return Err(DecodeError::InvalidBencodexValueError),
133        };
134        tsize += size;
135        index = start + tsize;
136        let (value, size) = decode_impl(vector, index)?;
137
138        match map.insert(key, value) {
139            None => (),
140            Some(_) => todo!(),
141        };
142        tsize += size;
143        index = start + tsize;
144    }
145
146    vector
147        .get(index)
148        .should_not_be_none()?
149        .expect(b'e', index)?;
150    tsize += 1;
151
152    Ok((BencodexValue::Dictionary(map), tsize))
153}
154
155// start must be on 'l'
156fn decode_list_impl(vector: &[u8], start: usize) -> Result<(BencodexValue, usize), DecodeError> {
157    vector
158        .get(start)
159        .should_not_be_none()?
160        .expect(b'l', start)?;
161
162    let mut tsize: usize = 1;
163    let mut list = Vec::new();
164    let mut index = start + tsize;
165    while vector.get(index).should_not_be_none()? != b'e' {
166        let (value, size) = decode_impl(vector, index)?;
167        list.push(value);
168        tsize += size;
169        index = start + tsize
170    }
171
172    index = start + tsize;
173    vector
174        .get(index)
175        .should_not_be_none()?
176        .expect(b'e', index)?;
177    tsize += 1;
178
179    Ok((BencodexValue::List(list), tsize))
180}
181
182fn decode_byte_string_impl(
183    vector: &[u8],
184    start: usize,
185) -> Result<(BencodexValue, usize), DecodeError> {
186    let mut tsize: usize = 0;
187    let (length, size) = match read_number(&vector[start + tsize..]) {
188        None => return Err(DecodeError::InvalidBencodexValueError),
189        Some(v) => v,
190    };
191    tsize += size;
192
193    let index = start + tsize;
194    vector
195        .get(index)
196        .should_not_be_none()?
197        .expect(b':', index)?;
198    tsize += 1;
199    let length_size = length.to_usize().unwrap();
200    if vector.len() < start + tsize + length_size {
201        return Err(DecodeError::InvalidBencodexValueError);
202    }
203    Ok((
204        BencodexValue::Binary(vector[start + tsize..start + tsize + length_size].to_vec()),
205        tsize + length_size,
206    ))
207}
208
209// start must be on 'u'
210fn decode_unicode_string_impl(
211    vector: &[u8],
212    start: usize,
213) -> Result<(BencodexValue, usize), DecodeError> {
214    vector
215        .get(start)
216        .should_not_be_none()?
217        .expect(b'u', start)?;
218
219    let mut tsize: usize = 1;
220    if vector.len() < start + tsize + 1 {
221        return Err(DecodeError::InvalidBencodexValueError);
222    }
223    let (length, size) = match read_number(&vector[start + tsize..]) {
224        None => return Err(DecodeError::InvalidBencodexValueError),
225        Some(v) => v,
226    };
227    if length < BigInt::from(0) {
228        return Err(DecodeError::UnexpectedTokenError {
229            token: vector[start + tsize],
230            point: start + tsize,
231        });
232    }
233    tsize += size;
234
235    let index = start + tsize;
236    vector
237        .get(index)
238        .should_not_be_none()?
239        .expect(b':', index)?;
240    tsize += 1;
241
242    let length_size = length.to_usize().unwrap();
243    if vector.len() < start + tsize + length_size {
244        return Err(DecodeError::InvalidBencodexValueError);
245    }
246    let text = match str::from_utf8(&vector[start + tsize..start + tsize + length_size]) {
247        Ok(v) => v,
248        Err(_) => return Err(DecodeError::InvalidBencodexValueError),
249    };
250    tsize += length_size;
251    Ok((BencodexValue::Text(text.to_string()), tsize))
252}
253
254// start must be on 'i'
255fn decode_number_impl(vector: &[u8], start: usize) -> Result<(BencodexValue, usize), DecodeError> {
256    let mut tsize: usize = 1;
257    if vector.len() < start + tsize + 1 {
258        return Err(DecodeError::InvalidBencodexValueError);
259    }
260    let (number, size) = match read_number(&vector[start + tsize..]) {
261        None => {
262            return Err(DecodeError::UnexpectedTokenError {
263                token: vector[start + tsize],
264                point: start + tsize,
265            })
266        }
267        Some(v) => v,
268    };
269    tsize += size;
270
271    let index = start + tsize;
272    vector
273        .get(index)
274        .should_not_be_none()?
275        .expect(b'e', index)?;
276    tsize += 1;
277    Ok((BencodexValue::Number(number), tsize))
278}
279
280fn read_number(s: &[u8]) -> Option<(BigInt, usize)> {
281    if s.is_empty() {
282        return None;
283    }
284
285    let is_negative = s[0] == b'-';
286    if s.len() == 1 && is_negative {
287        return None;
288    }
289
290    let mut size: usize = is_negative as usize;
291    while size < s.len() {
292        match s[size] {
293            b'0'..=b'9' => {
294                size += 1;
295                continue;
296            }
297            _ => break,
298        };
299    }
300
301    if is_negative && size == 1 || size == 0 {
302        None
303    } else {
304        Some((
305            BigInt::from_str(&String::from_utf8(s[..size].to_vec()).unwrap()).unwrap(),
306            size,
307        ))
308    }
309}
310
311impl Decode for Vec<u8> {
312    /// ```
313    /// use bencodex::{ Decode, BencodexValue };
314    /// use std::collections::BTreeMap;
315    ///
316    /// let buf = b"de".to_vec();
317    /// let dictionary = buf.decode().ok().unwrap();
318    ///
319    /// assert_eq!(dictionary, BencodexValue::Dictionary(BTreeMap::new()));
320    /// ```
321    fn decode(self) -> Result<BencodexValue, DecodeError> {
322        Ok(decode_impl(&self, 0)?.0)
323    }
324}
325
326#[cfg(test)]
327mod tests {
328    mod decode_impl {
329        use super::super::*;
330
331        #[test]
332        fn should_return_error_with_overflowed_start() {
333            let expected_error = DecodeError::InvalidBencodexValueError;
334            assert_eq!(expected_error, decode_impl(&vec![], 1).unwrap_err());
335            assert_eq!(
336                expected_error,
337                decode_impl(&vec![b'1', b'2'], 2).unwrap_err()
338            );
339            assert_eq!(
340                expected_error,
341                decode_impl(&vec![b'1', b'2'], 20).unwrap_err()
342            );
343        }
344
345        #[test]
346        fn should_return_unexpected_token_error_with_invalid_source() {
347            assert_eq!(
348                DecodeError::UnexpectedTokenError {
349                    token: b'x',
350                    point: 0,
351                },
352                decode_impl(&vec![b'x'], 0).unwrap_err()
353            );
354            assert_eq!(
355                DecodeError::UnexpectedTokenError {
356                    token: b'k',
357                    point: 4,
358                },
359                decode_impl(&vec![b'x', b'y', b'z', b'o', b'k'], 4).unwrap_err()
360            );
361        }
362    }
363
364    mod decode_dict_impl {
365        use super::super::*;
366
367        #[test]
368        fn should_return_error_with_insufficient_length_source() {
369            let expected_error = DecodeError::InvalidBencodexValueError;
370            assert_eq!(
371                expected_error,
372                decode_dict_impl(&vec![b'd'], 0).unwrap_err()
373            );
374            assert_eq!(
375                expected_error,
376                decode_dict_impl(&vec![b'd'], 2).unwrap_err()
377            );
378            assert_eq!(expected_error, decode_dict_impl(&vec![], 0).unwrap_err());
379        }
380
381        #[test]
382        fn should_return_error_with_source_having_incorrect_key() {
383            let expected_error = DecodeError::InvalidBencodexValueError;
384            // { 0: null }
385            assert_eq!(
386                expected_error,
387                decode_dict_impl(&vec![b'd', b'i', b'0', b'e', b'n', b'e'], 0).unwrap_err()
388            );
389            // { null: null }
390            assert_eq!(
391                expected_error,
392                decode_dict_impl(&vec![b'd', b'n', b'n', b'e'], 0).unwrap_err()
393            );
394            // { list: null }
395            assert_eq!(
396                expected_error,
397                decode_dict_impl(&vec![b'd', b'l', b'e', b'n', b'e'], 0).unwrap_err()
398            );
399            // { dictionary: null }
400            assert_eq!(
401                expected_error,
402                decode_dict_impl(&vec![b'd', b'd', b'e', b'n', b'e'], 0).unwrap_err()
403            );
404            // { boolean: null }
405            assert_eq!(
406                expected_error,
407                decode_dict_impl(&vec![b'd', b't', b'e', b'n', b'e'], 0).unwrap_err()
408            );
409        }
410
411        #[test]
412        fn should_pass_error() {
413            assert_eq!(
414                DecodeError::UnexpectedTokenError {
415                    token: b'k',
416                    point: 1,
417                },
418                decode_dict_impl(&vec![b'd', b'k', b'n', b'e'], 0).unwrap_err()
419            );
420            assert_eq!(
421                DecodeError::UnexpectedTokenError {
422                    token: b'k',
423                    point: 4,
424                },
425                decode_dict_impl(&vec![b'd', b'1', b':', b'a', b'k', b'e'], 0).unwrap_err()
426            );
427        }
428    }
429
430    mod decode_list_impl {
431        use super::super::*;
432
433        #[test]
434        fn should_return_error_with_insufficient_length_source() {
435            let expected_error = DecodeError::InvalidBencodexValueError;
436            assert_eq!(
437                expected_error,
438                decode_list_impl(&vec![b'l'], 0).unwrap_err()
439            );
440            assert_eq!(
441                expected_error,
442                decode_list_impl(&vec![b'l'], 2).unwrap_err()
443            );
444            assert_eq!(expected_error, decode_list_impl(&vec![], 0).unwrap_err());
445        }
446
447        #[test]
448        fn should_pass_error() {
449            assert_eq!(
450                DecodeError::UnexpectedTokenError {
451                    token: b'k',
452                    point: 1,
453                },
454                decode_list_impl(&vec![b'l', b'k', b'e'], 0).unwrap_err()
455            );
456        }
457    }
458
459    mod decode_byte_string_impl {
460        use super::super::*;
461
462        #[test]
463        fn should_return_error_with_insufficient_length_source() {
464            let expected_error = DecodeError::InvalidBencodexValueError;
465            assert_eq!(
466                expected_error,
467                decode_byte_string_impl(&vec![b'1'], 0).unwrap_err()
468            );
469            assert_eq!(
470                expected_error,
471                decode_byte_string_impl(&vec![b'1', b':'], 0).unwrap_err()
472            );
473            assert_eq!(
474                expected_error,
475                decode_byte_string_impl(&vec![b'2', b':', b'a'], 0).unwrap_err()
476            );
477            assert_eq!(
478                expected_error,
479                decode_byte_string_impl(&vec![], 0).unwrap_err()
480            );
481        }
482
483        #[test]
484        fn should_return_unexpected_token_error_with_invalid_source() {
485            assert_eq!(
486                DecodeError::UnexpectedTokenError {
487                    token: b'k',
488                    point: 1,
489                },
490                decode_byte_string_impl(&vec![b'1', b'k', b'a'], 0).unwrap_err()
491            );
492        }
493    }
494
495    mod decode_unicode_string_impl {
496        use super::super::*;
497
498        #[test]
499        fn should_return_error_with_insufficient_length_source() {
500            let expected_error = DecodeError::InvalidBencodexValueError;
501            assert_eq!(
502                expected_error,
503                decode_unicode_string_impl(&vec![b'u'], 0).unwrap_err()
504            );
505            assert_eq!(
506                expected_error,
507                decode_unicode_string_impl(&vec![b'u', b'1'], 0).unwrap_err()
508            );
509            assert_eq!(
510                expected_error,
511                decode_unicode_string_impl(&vec![b'u', b'2', b':', b'a'], 0).unwrap_err()
512            );
513            assert_eq!(
514                expected_error,
515                decode_unicode_string_impl(&vec![b'u', b'k'], 0).unwrap_err()
516            );
517            assert_eq!(
518                expected_error,
519                decode_unicode_string_impl(&vec![], 0).unwrap_err()
520            );
521        }
522
523        #[test]
524        fn should_return_unexpected_token_error_with_invalid_source() {
525            assert_eq!(
526                DecodeError::UnexpectedTokenError {
527                    token: b'k',
528                    point: 2
529                },
530                decode_unicode_string_impl(&vec![b'u', b'1', b'k', b'a'], 0).unwrap_err()
531            );
532        }
533
534        #[test]
535        fn should_return_unexpected_token_error_with_negative_length_number() {
536            assert_eq!(
537                DecodeError::UnexpectedTokenError {
538                    token: b'-',
539                    point: 1,
540                },
541                decode_unicode_string_impl(&vec![b'u', b'-', b'1', b':', b'a'], 0).unwrap_err()
542            );
543        }
544
545        #[test]
546        fn should_return_error_with_invalid_source_having_invalid_unicode_string() {
547            assert_eq!(
548                DecodeError::InvalidBencodexValueError,
549                decode_unicode_string_impl(&vec![b'u', b'1', b':', 0x90], 0).unwrap_err()
550            );
551        }
552    }
553
554    mod decode_number_impl {
555        use super::super::*;
556
557        #[test]
558        fn should_return_error_with_insufficient_length_source() {
559            let expected_error = DecodeError::InvalidBencodexValueError;
560            assert_eq!(
561                expected_error,
562                decode_number_impl(&vec![b'i'], 0).unwrap_err()
563            );
564            assert_eq!(
565                expected_error,
566                decode_number_impl(&vec![b'i', b'2'], 0).unwrap_err()
567            );
568            assert_eq!(
569                expected_error,
570                decode_number_impl(&vec![b'i', b'-', b'2'], 0).unwrap_err()
571            );
572            assert_eq!(expected_error, decode_number_impl(&vec![], 0).unwrap_err());
573        }
574
575        #[test]
576        fn should_return_unexpected_token_error_with_invalid_source() {
577            assert_eq!(
578                DecodeError::UnexpectedTokenError {
579                    token: b'a',
580                    point: 1,
581                },
582                decode_number_impl(&vec![b'i', b'a', b'a'], 0).unwrap_err()
583            );
584            assert_eq!(
585                DecodeError::UnexpectedTokenError {
586                    token: b'a',
587                    point: 2,
588                },
589                decode_number_impl(&vec![b'i', b'1', b'a'], 0).unwrap_err()
590            );
591        }
592    }
593
594    mod vec_u8 {
595        mod decode_impl {
596            mod decode {
597                use super::super::super::super::*;
598
599                #[test]
600                fn should_pass_error() {
601                    assert_eq!(
602                        DecodeError::InvalidBencodexValueError,
603                        vec![].decode().unwrap_err()
604                    );
605                    assert_eq!(
606                        DecodeError::UnexpectedTokenError {
607                            token: b'_',
608                            point: 0,
609                        },
610                        vec![b'_'].decode().unwrap_err()
611                    );
612                }
613            }
614        }
615    }
616
617    mod u8 {
618        mod expect_impl {
619            mod expect {
620                use super::super::super::super::{DecodeError, Expect};
621
622                #[test]
623                fn should_return_unexpected_token_error() {
624                    let decode_error = b'a'.expect(b'u', 12).unwrap_err();
625                    if let DecodeError::UnexpectedTokenError { token, point } = decode_error {
626                        assert_eq!(b'a', token);
627                        assert_eq!(12, point);
628                    }
629
630                    let decode_error = b'x'.expect(b'u', 100).unwrap_err();
631                    if let DecodeError::UnexpectedTokenError { token, point } = decode_error {
632                        assert_eq!(b'x', token);
633                        assert_eq!(100, point);
634                    }
635                }
636            }
637        }
638    }
639
640    mod decode_error {
641        mod display_impl {
642            use super::super::super::*;
643
644            #[test]
645            fn fmt() {
646                assert_eq!(
647                    "InvalidBencodexValueError",
648                    DecodeError::InvalidBencodexValueError.to_string()
649                )
650            }
651        }
652    }
653
654    mod read_number {
655        use super::super::*;
656
657        #[test]
658        fn should_return_none() {
659            assert_eq!(None, read_number(b""));
660        }
661
662        #[test]
663        fn should_return_ok_with_positive() {
664            assert_eq!(Some((BigInt::from(1), 1)), read_number(b"1"));
665            assert_eq!(Some((BigInt::from(326), 3)), read_number(b"326"));
666        }
667
668        #[test]
669        fn should_return_ok_with_negative() {
670            assert_eq!(Some((BigInt::from(-1), 2)), read_number(b"-1"));
671            assert_eq!(Some((BigInt::from(-845), 4)), read_number(b"-845"));
672        }
673
674        #[test]
675        fn should_return_none_with_single_minus_sign() {
676            assert_eq!(None, read_number(b"-"));
677        }
678
679        #[test]
680        fn should_return_none_with_single_minus_sign_and_invalid_char() {
681            assert_eq!(None, read_number(b"-e"));
682            assert_eq!(None, read_number(b"-x"));
683        }
684    }
685}