parse/
parse.rs

1use std;
2
3pub fn literal<'a, T: AsRef<[u8]>>(src: &mut &'a[u8], what: T) -> ::Result<()> {
4    let what = what.as_ref();
5    if !src.starts_with(what) {
6        return Err("invalid literal")
7    }
8    *src = &src[what.len()..];
9    Ok(())
10}
11
12#[test]
13fn test_literal() {
14    let mut src = "tuonen joutsen 123%&#".as_bytes();
15    literal(&mut src, "tuonen").unwrap();
16    assert_eq!(b" joutsen 123%&#", src);
17    literal(&mut src, "").unwrap();
18    literal(&mut src, " joutsen 123%").unwrap();
19    assert_eq!(b"&#", src);
20    assert!(literal(&mut src, "k").is_err());
21    assert!(literal(&mut src, "&#o").is_err());
22    literal(&mut src, "&#").unwrap();
23    assert_eq!(0, src.len());
24}
25
26pub fn whitespace_if_any<'a>(src: &mut &'a[u8]) {
27    let mut i = 0;
28    while i < src.len() && is_whitespace(src[i]) {
29        i += 1;
30    }
31    *src = &src[i..];
32}
33
34// requires at least one whitespace character.
35pub fn whitespace<'a>(src: &mut &'a[u8]) -> ::Result<()> {
36    if src.len() < 1 || !is_whitespace(src[0]) {
37        return Err("no whitespace")
38    }
39    *src = &src[1..];
40    Ok(whitespace_if_any(src))
41}
42
43#[test]
44fn test_whitespace() {
45    let mut src = "\n\t\r\n yyy \n\r\t\nzzz".as_bytes();
46    whitespace(&mut src).unwrap();
47    assert_eq!(b"yyy \n\r\t\nzzz", src);
48    literal(&mut src, "yyy").unwrap();
49    whitespace_if_any(&mut src);
50    assert_eq!(b"zzz", src);
51    whitespace_if_any(&mut src);
52    assert_eq!(b"zzz", src);
53}
54
55macro_rules! uint {
56    ($T:ident) => {
57        pub fn $T(src: &mut &[u8]) -> ::Result<$T> {
58            let mut v = 0 as $T;
59            let mut i = 0;
60            let q = std::$T::MAX / 10;
61            let e = std::$T::MAX % 10;
62            while i < src.len() && is_digit(src[i]) {
63                let number = (src[i] - b'0') as $T;
64                if v > q || (v == q && number > e) {
65                    return Err("invalid uint: overflow")
66                }
67                v = v * 10 + number;
68                i += 1;
69            }
70            if i == 0 {
71                return Err("invalid uint")
72            }
73            *src = &src[i..];
74            Ok(v)
75        }
76    }
77}
78uint!(u8);
79uint!(u16);
80uint!(u32);
81uint!(u64);
82
83#[test]
84fn test_uint() {
85    let mut src = "1k".as_bytes();
86    assert_eq!(1u8, u8(&mut src).unwrap());
87    assert_eq!(b"k", src);
88    assert_eq!(1u8, u8(&mut "01".as_bytes()).unwrap());
89    assert_eq!(0u8, u8(&mut "0".as_bytes()).unwrap());
90    assert_eq!(255_u8, u8(&mut "255".as_bytes()).unwrap());
91    assert_eq!(256_u16, u16(&mut "256".as_bytes()).unwrap());
92    assert_eq!(65535_u16, u16(&mut "65535".as_bytes()).unwrap());
93    assert_eq!(4294967295_u32, u32(&mut "4294967295".as_bytes()).unwrap());
94    assert_eq!(18446744073709551615_u64, u64(&mut "18446744073709551615".as_bytes()).unwrap());
95    assert!(u8(&mut "256".as_bytes()).is_err());
96    assert!(u8(&mut "1000".as_bytes()).is_err());
97    assert!(u16(&mut "65536".as_bytes()).is_err());
98    assert!(u16(&mut "100000".as_bytes()).is_err());
99    assert!(u32(&mut "4294967296".as_bytes()).is_err());
100    assert!(u32(&mut "10000000000".as_bytes()).is_err());
101    assert!(u64(&mut "18446744073709551616".as_bytes()).is_err());
102    assert!(u64(&mut "100000000000000000000".as_bytes()).is_err());
103    assert!(u32(&mut "".as_bytes()).is_err());
104    assert!(u32(&mut "-255".as_bytes()).is_err());
105    assert!(u32(&mut "q".as_bytes()).is_err());
106}
107
108macro_rules! int {
109    ($T:ident) => {
110        pub fn $T(src: &mut &[u8]) -> ::Result<$T> {
111            if src.len() == 0 {
112                return Err("invalid int: nothing to parse")
113            }
114
115            let mut i = 0;
116            let digit_offset;
117            let positive = src[0] != b'-';
118            if src[0] == b'-' || src[0] == b'+' {
119                i += 1;
120                digit_offset = 1;
121            } else {
122                digit_offset = 0;
123            }
124            let mut v = 0 as $T;
125            let q = if positive { std::$T::MAX / 10 } else { -(std::$T::MIN / 10) };
126            let e = if positive { std::$T::MAX % 10 } else { -(std::$T::MIN % 10) };
127
128            while i < src.len() && is_digit(src[i]) {
129                let number = (src[i] - b'0') as $T;
130                if v > q || (v == q && number > e) {
131                    return Err("invalid int: overflow")
132                }
133                v = (v * 10).wrapping_add(number);    // let MAX+1 wrap to MIN
134                i += 1;
135            }
136            if i <= digit_offset {
137                return Err("invalid int")
138            }
139            *src = &src[i..];
140            Ok(if positive { v } else { v.wrapping_neg() })     // -MIN == MIN
141        }
142    }
143}
144int!(i8);
145int!(i16);
146int!(i32);
147int!(i64);
148
149#[test]
150fn test_int() {
151    let mut src = "1k".as_bytes();
152    assert_eq!(1i8, i8(&mut src).unwrap());
153    assert_eq!(b"k", src);
154    assert_eq!(1i8, i8(&mut "01".as_bytes()).unwrap());
155    assert_eq!(0i8, i8(&mut "0".as_bytes()).unwrap());
156    assert_eq!(1i8, i8(&mut "+1".as_bytes()).unwrap());
157
158    assert_eq!(-8i8, i8(&mut "-8".as_bytes()).unwrap());
159    assert_eq!(-128i8, i8(&mut "-128".as_bytes()).unwrap());
160    assert_eq!(127i8, i8(&mut "127".as_bytes()).unwrap());
161    assert_eq!(-32768_i16, i16(&mut "-32768".as_bytes()).unwrap());
162    assert_eq!(32767_i16, i16(&mut "32767".as_bytes()).unwrap());
163    assert_eq!(-2147483648_i32, i32(&mut "-2147483648".as_bytes()).unwrap());
164    assert_eq!(2147483647_i32, i32(&mut "2147483647".as_bytes()).unwrap());
165    assert_eq!(-9223372036854775808_i64, i64(&mut "-9223372036854775808".as_bytes()).unwrap());
166    assert_eq!(9223372036854775807_i64, i64(&mut "9223372036854775807".as_bytes()).unwrap());
167
168    assert!(i8(&mut "-129".as_bytes()).is_err());
169    assert!(i8(&mut "-1000".as_bytes()).is_err());
170    assert!(i8(&mut "128".as_bytes()).is_err());
171    assert!(i8(&mut "1000".as_bytes()).is_err());
172    assert!(i16(&mut "-32769".as_bytes()).is_err());
173    assert!(i16(&mut "-100000".as_bytes()).is_err());
174    assert!(i16(&mut "32768".as_bytes()).is_err());
175    assert!(i16(&mut "100000".as_bytes()).is_err());
176    assert!(i32(&mut "-2147483649".as_bytes()).is_err());
177    assert!(i32(&mut "-10000000000".as_bytes()).is_err());
178    assert!(i32(&mut "2147483648".as_bytes()).is_err());
179    assert!(i32(&mut "10000000000".as_bytes()).is_err());
180    assert!(i64(&mut "-9223372036854775809".as_bytes()).is_err());
181    assert!(i64(&mut "-10000000000000000000".as_bytes()).is_err());
182    assert!(i64(&mut "9223372036854775808".as_bytes()).is_err());
183    assert!(i64(&mut "10000000000000000000".as_bytes()).is_err());
184
185    assert!(i32(&mut "".as_bytes()).is_err());
186    assert!(i32(&mut "-".as_bytes()).is_err());
187    assert!(i32(&mut "+".as_bytes()).is_err());
188    assert!(i32(&mut "q".as_bytes()).is_err());
189}
190
191pub fn f32(src: &mut &[u8]) -> ::Result<f32> {
192    use std::str::{self, FromStr};
193
194    if src.len() == 0 {
195        return Err("invalid float: nothing to parse")
196    }
197
198    let mut i = 0;
199    if src[i] == b'-' || src[i] == b'+' {
200        i += 1;
201    }
202
203    while i < src.len() && is_digit(src[i]) {
204        i += 1;
205    }
206
207    if i < src.len() && src[i] == b'.' {
208        i += 1;
209        while i < src.len() && is_digit(src[i]) {
210            i += 1;
211        }
212    }
213
214    if i + 1 < src.len() && src[i] == b'e' {
215        if is_digit(src[i+1]) {
216            i += 2;
217        } else if i + 2 < src.len() && src[i+1] == b'-' && is_digit(src[i+2]) {
218            i += 3;
219        } else {
220            return Err("invalid float")
221        }
222        while i < src.len() && is_digit(src[i]) {
223            i += 1;
224        }
225    }
226
227    let s = unsafe { str::from_utf8_unchecked(&src[..i]) };
228    if let Ok(v) = FromStr::from_str(s) {
229        *src = &src[i..];
230        return Ok(v)
231    }
232    Err("invalid float")
233}
234
235#[test]
236fn test_f32() {
237    assert_eq!(32.,    f32(&mut "+32".as_bytes()).unwrap());
238    assert_eq!(-32.,   f32(&mut "-32".as_bytes()).unwrap());
239    assert_eq!(32.,    f32(&mut "32".as_bytes()).unwrap());
240    assert_eq!(32e2,   f32(&mut "32e2".as_bytes()).unwrap());
241    assert_eq!(32.,    f32(&mut "32.".as_bytes()).unwrap());
242    assert_eq!(32e2,   f32(&mut "32.e2".as_bytes()).unwrap());
243    assert_eq!(32_f32, f32(&mut "32.0".as_bytes()).unwrap());
244    assert_eq!(32e2,   f32(&mut "32.0e2".as_bytes()).unwrap());
245    assert_eq!(0.32,   f32(&mut "32.0e-2".as_bytes()).unwrap());
246}
247
248pub fn bool(src: &mut &[u8]) -> ::Result<bool> {
249    if src.starts_with(b"true") {
250        *src = &src[4..];
251        return Ok(true)
252    } else if src.starts_with(b"false") {
253        *src = &src[5..];
254        return Ok(false)
255    }
256    Err("invalid bool")
257}
258
259#[test]
260fn test_bool() {
261    assert_eq!(true, bool(&mut "true".as_bytes()).unwrap());
262    assert_eq!(false, bool(&mut "false".as_bytes()).unwrap());
263    assert_eq!(false, bool(&mut "falsee".as_bytes()).unwrap());   // NOTE
264    assert!(bool(&mut "tru".as_bytes()).is_err());
265    assert!(bool(&mut "trudat".as_bytes()).is_err());
266    assert!(bool(&mut "fals".as_bytes()).is_err());
267}
268
269macro_rules! hex {
270    ($T:ident, $name:ident) => {
271        pub fn $name(src: &mut &[u8]) -> ::Result<$T> {
272            let mut v = 0 as $T;
273            let mut i = 0;
274            let q = std::$T::MAX / 16;  // note that MAX % 16 == 15, for all the types.
275            let mut number = 0;
276            while i < src.len() && hex_digit_to_value(src[i], &mut number) {
277                if v > q {
278                    return Err("invalid hex: overflow")
279                }
280                v = v * 16 + number as $T;
281                i += 1;
282            }
283            if i == 0 {
284                return Err("invalid hex")
285            }
286            *src = &src[i..];
287            Ok(v)
288        }
289    }
290}
291hex!(u8, hex_u8);
292hex!(u16, hex_u16);
293hex!(u32, hex_u32);
294hex!(u64, hex_u64);
295
296#[test]
297fn test_hex() {
298    let mut src = "1k".as_bytes();
299    assert_eq!(1u8, hex_u8(&mut src).unwrap());
300    assert_eq!(b"k", src);
301    assert_eq!(1u8, hex_u8(&mut "01".as_bytes()).unwrap());
302    assert_eq!(10u8, hex_u8(&mut "a".as_bytes()).unwrap());
303    assert_eq!(16u8, hex_u8(&mut "10".as_bytes()).unwrap());
304    assert_eq!(255u8, hex_u8(&mut "ff".as_bytes()).unwrap());
305    assert_eq!(256u16, hex_u16(&mut "100".as_bytes()).unwrap());
306    assert_eq!(65535_u16, hex_u16(&mut "ffff".as_bytes()).unwrap());
307    assert_eq!(0xffff_ffff_u32, hex_u32(&mut "ffffffff".as_bytes()).unwrap());
308    assert_eq!(0xffff_ffff_u32, hex_u32(&mut "FFFFFFFF".as_bytes()).unwrap());
309    assert_eq!(0xffff_ffff_ffff_ffff_u64, hex_u64(&mut "ffffffffffffffff".as_bytes()).unwrap());
310    assert!(hex_u8(&mut "100".as_bytes()).is_err());
311    assert!(hex_u16(&mut "10000".as_bytes()).is_err());
312    assert!(hex_u32(&mut "100000000".as_bytes()).is_err());
313    assert!(hex_u64(&mut "10000000000000000".as_bytes()).is_err());
314}
315
316// requires `src` to have two digits for each byte.
317pub fn hex_string<'a>(src: &mut &[u8], dst: &'a mut[u8]) -> ::Result<()> {
318    if src.len() < 2 * dst.len() {
319        return Err("not enough digits for hex string")
320    }
321    let mut high = 0;
322    let mut low = 0;
323    let mut i = 0;
324    for d in &mut dst[..] {
325        if !hex_digit_to_value(src[i], &mut high)
326        || !hex_digit_to_value(src[i+1], &mut low) {
327            return Err("invalid hex string")
328        }
329        *d = high * 16 + low;
330        i += 2;
331    }
332    *src = &src[i..];
333    Ok(())
334}
335
336#[test]
337fn test_hex_string() {
338    let mut src = "AAbb70b7e752fcc3f51decbc656270834ed45a39 readme.txt".as_bytes();
339    let mut hash = [0u8; 20];
340    let res = b"\xAA\xbb\x70\xb7\xe7\x52\xfc\xc3\xf5\x1d\
341                \xec\xbc\x65\x62\x70\x83\x4e\xd4\x5a\x39";
342    hex_string(&mut src, &mut hash[..]).unwrap();
343    assert_eq!(&res[..], &hash[..]);
344    assert_eq!(b" readme.txt", src);
345    let mut src = "bb70b7e752fcc3f51decbc656270834ed45a39".as_bytes();  // missing digits
346    assert!(hex_string(&mut src, &mut hash[..]).is_err());
347}
348
349#[inline]
350pub fn is_whitespace(c: u8) -> bool {
351    c == b' ' || c == b'\t' || c == b'\n' || c == b'\r'
352}
353
354#[inline]
355pub fn is_digit(c: u8) -> bool {
356    b'0' <= c && c <= b'9'
357}
358
359pub fn hex_digit_to_value(digit: u8, out: &mut u8) -> bool {
360    if b'0' <= digit && digit <= b'9' {
361        *out = digit - b'0';
362    } else if b'a' <= digit && digit <= b'f' {
363        *out = digit - b'a' + 10;
364    } else if b'A' <= digit && digit <= b'F' {
365        *out = digit - b'A' + 10;
366    } else {
367        return false
368    }
369    true
370}
371
372/// Parse binary data.
373pub mod bin {
374    use std::mem;
375    macro_rules! int_be {
376        ($T:ident, $name:ident) => {
377            pub fn $name(src: &mut &[u8]) -> ::Result<$T> {
378                if src.len() < mem::size_of::<$T>() {
379                    return Err("can't parse integer")
380                }
381                let res = $T::from_be(*unsafe { mem::transmute::<&u8, &$T>(&src[0]) });
382                *src = &src[mem::size_of::<$T>()..];
383                Ok(res)
384            }
385        }
386    }
387    int_be!(u8, u8);
388    int_be!(i8, i8);
389    int_be!(u16, u16_be);
390    int_be!(i16, i16_be);
391    int_be!(u32, u32_be);
392    int_be!(i32, i32_be);
393    int_be!(u64, u64_be);
394    int_be!(i64, i64_be);
395
396    macro_rules! int_le {
397        ($T:ident, $name:ident) => {
398            pub fn $name(src: &mut &[u8]) -> ::Result<$T> {
399                if src.len() < mem::size_of::<$T>() {
400                    return Err("can't parse integer")
401                }
402                let res = $T::from_le(*unsafe { mem::transmute::<&u8, &$T>(&src[0]) });
403                *src = &src[mem::size_of::<$T>()..];
404                Ok(res)
405            }
406        }
407    }
408    int_le!(u16, u16_le);
409    int_le!(i16, i16_le);
410    int_le!(u32, u32_le);
411    int_le!(i32, i32_le);
412    int_le!(u64, u64_le);
413    int_le!(i64, i64_le);
414
415    #[test]
416    fn test_bin() {
417        let src = [0xff, 0x01];
418        assert_eq!(0xff01, u16_be(&mut &src[..]).unwrap());
419        assert_eq!(0x01ff, u16_le(&mut &src[..]).unwrap());
420    }
421}