scan_rules/scanner/
lang.rs

1/*
2Copyright ⓒ 2016 Daniel Keep.
3
4Licensed under the MIT license (see LICENSE or <http://opensource.org
5/licenses/MIT>) or the Apache License, Version 2.0 (see LICENSE of
6<http://www.apache.org/licenses/LICENSE-2.0>), at your option. All
7files in the project carrying such notice may not be copied, modified,
8or distributed except according to those terms.
9*/
10/*!
11Implementations of `ScanFromStr` for primitive language types.
12*/
13use itertools::Itertools;
14use strcursor::StrCursor;
15use ::ScanError;
16use ::input::ScanInput;
17use super::ScanFromStr;
18use super::misc::Word;
19
20parse_scanner! { impl<'a> for bool, from Word, err desc "expected `true` or `false`" }
21
22#[cfg(test)]
23#[test]
24fn test_scan_bool() {
25    use ::ScanError as SE;
26    use ::ScanErrorKind as SEK;
27
28    assert_match!(<bool>::scan_from(""), Err(SE { kind: SEK::Syntax(_), .. }));
29    assert_match!(<bool>::scan_from("y"), Err(SE { kind: SEK::Syntax(_), .. }));
30    assert_match!(<bool>::scan_from("n"), Err(SE { kind: SEK::Syntax(_), .. }));
31    assert_match!(<bool>::scan_from("yes"), Err(SE { kind: SEK::Syntax(_), .. }));
32    assert_match!(<bool>::scan_from("no"), Err(SE { kind: SEK::Syntax(_), .. }));
33    assert_match!(<bool>::scan_from(" "), Err(SE { kind: SEK::Syntax(_), .. }));
34    assert_match!(<bool>::scan_from(" true"), Err(SE { kind: SEK::Syntax(_), .. }));
35    assert_match!(<bool>::scan_from(" false"), Err(SE { kind: SEK::Syntax(_), .. }));
36    assert_match!(<bool>::scan_from("true"), Ok((true, 4)));
37    assert_match!(<bool>::scan_from("false"), Ok((false, 5)));
38    assert_match!(<bool>::scan_from("True"), Err(SE { kind: SEK::Syntax(_), .. }));
39    assert_match!(<bool>::scan_from("False"), Err(SE { kind: SEK::Syntax(_), .. }));
40}
41
42impl<'a> ScanFromStr<'a> for char {
43    type Output = char;
44    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
45        let cur = try!(StrCursor::new_at_start(s.as_str()).at_next_cp()
46            .ok_or(ScanError::syntax("expected a character")));
47        Ok((cur.cp_before().unwrap(), cur.byte_pos()))
48    }
49}
50
51#[cfg(test)]
52#[test]
53fn test_scan_char() {
54    use ::ScanError as SE;
55    use ::ScanErrorKind as SEK;
56
57    assert_match!(<char>::scan_from(""), Err(SE { kind: SEK::Syntax(_), .. }));
58    assert_match!(<char>::scan_from(" "), Ok((' ', 1)));
59    assert_match!(<char>::scan_from("x"), Ok(('x', 1)));
60    assert_match!(<char>::scan_from("xy"), Ok(('x', 1)));
61    assert_match!(<char>::scan_from("é"), Ok(('e', 1)));
62    assert_match!(<char>::scan_from("é"), Ok(('é', 2)));
63    assert_match!(<char>::scan_from("字"), Ok(('字', 3)));
64}
65
66parse_scanner! { impl<'a> for f32, matcher match_float, matcher err "expected floating point number", err map ScanError::float }
67parse_scanner! { impl<'a> for f64, matcher match_float, matcher err "expected floating point number", err map ScanError::float }
68
69fn match_float(s: &str) -> Option<((usize, usize), usize)> {
70    use std::iter::Peekable;
71
72    // First, check for one of the named constants.
73    if s.starts_with("inf") {
74        if s[3..].chars().next().map(|c| !c.is_alphabetic()).unwrap_or(true) {
75            return Some(((0, 3), 3));
76        }
77    }
78
79    if s.starts_with("-inf") {
80        if s[4..].chars().next().map(|c| !c.is_alphabetic()).unwrap_or(true) {
81            return Some(((0, 4), 4));
82        }
83    }
84
85    if s.starts_with("NaN") {
86        if s[3..].chars().next().map(|c| !c.is_alphabetic()).unwrap_or(true) {
87            return Some(((0, 3), 3));
88        }
89    }
90
91    // Ok, try scanning an actual number.
92    let mut ibs = s.bytes().enumerate().peekable();
93
94    match ibs.peek().map(|&(_, b)| b) {
95        Some(b'-') | Some(b'+') => { ibs.next(); },
96        _ => ()
97    }
98
99    // Skip over leading integer part.
100    let int_end = ibs
101        .take_while_ref(|&(_, b)| matches!(b, b'0'...b'9'))
102        .last()
103        .map(|(i, _)| i + 1)
104        .map(|n| ((0, n), n));
105
106    if let None = int_end {
107        return None;
108    }
109
110    // At this point, we get a decimal point, an "e", or the end of input.
111    fn match_exp<I: Iterator<Item=(usize, u8)>>(mut ibs: Peekable<I>)
112    -> Option<((usize, usize), usize)> {
113
114        match ibs.peek().map(|&(_, b)| b) {
115            Some(b'-') | Some(b'+') => { ibs.next(); },
116            _ => ()
117        }
118
119        ibs.take_while(|&(_, b)| matches!(b, b'0'...b'9'))
120            .last()
121            .map(|(i, _)| i + 1)
122            .map(|n| ((0, n), n))
123    }
124
125    match ibs.next() {
126        Some((i, b'.')) => {
127            // There *might* be another sequence of digits.
128            let end = (&mut ibs)
129                .take_while_ref(|&(_, b)| matches!(b, b'0'...b'9'))
130                .map(|(i, _)| i + 1)
131                .last()
132                .unwrap_or(i + 1);
133            
134            // Finally, there *might* be an exponent
135            match ibs.next().map(|(_, b)| b) {
136                Some(b'e') | Some(b'E') => {
137                    match_exp(ibs)
138                },
139                _ => Some(((0, end), end))
140            }
141        },
142
143        Some((_, b'e')) | Some((_, b'E')) => match_exp(ibs),
144
145        _ => int_end
146    }
147}
148
149#[cfg(test)]
150#[test]
151fn test_scan_f64() {
152    use ::ScanError as SE;
153    use ::ScanErrorKind as SEK;
154
155    macro_rules! check_f64 {
156        ($f:expr) => {
157            assert_match!(
158                <f64>::scan_from(stringify!($f)),
159                Ok(($f, n)) if n == stringify!($f).len()
160            );
161            assert_match!(
162                <f64>::scan_from(concat!("-", stringify!($f))),
163                Ok((-$f, n)) if n == concat!("-", stringify!($f)).len()
164            );
165        };
166    }
167
168    assert_match!(<f64>::scan_from(""), Err(SE { kind: SEK::Syntax(_), .. }));
169    assert_match!(<f64>::scan_from("-"), Err(SE { kind: SEK::Syntax(_), .. }));
170    assert_match!(<f64>::scan_from("+"), Err(SE { kind: SEK::Syntax(_), .. }));
171    assert_match!(<f64>::scan_from("x"), Err(SE { kind: SEK::Syntax(_), .. }));
172    assert_match!(<f64>::scan_from(" "), Err(SE { kind: SEK::Syntax(_), .. }));
173    assert_match!(<f64>::scan_from(" 0"), Err(SE { kind: SEK::Syntax(_), .. }));
174    assert_match!(<f64>::scan_from("0"), Ok((0.0, 1)));
175    assert_match!(<f64>::scan_from("0x"), Ok((0.0, 1)));
176    assert_match!(<f64>::scan_from("0."), Ok((0.0, 2)));
177    assert_match!(<f64>::scan_from("0.x"), Ok((0.0, 2)));
178
179    assert_match!(<f64>::scan_from("inf"), Ok((f, 3)) if f == ::std::f64::INFINITY);
180    assert_match!(<f64>::scan_from("-inf"), Ok((f, 4)) if f == ::std::f64::NEG_INFINITY);
181    assert_match!(<f64>::scan_from("NaN"), Ok((v, 3)) if v.is_nan());
182
183    check_f64!(0.0);
184    check_f64!(1.0);
185    check_f64!(0.1);
186    check_f64!(12345.);
187    check_f64!(0.12345);
188    check_f64!(101e-33);
189    check_f64!(1e23);
190    check_f64!(2075e23);
191    check_f64!(8713e-23);
192    check_f64!(1e-325);
193    check_f64!(1e-326);
194    check_f64!(1e-500);
195    check_f64!(1.448997445238699);
196}
197
198#[cfg(test)]
199#[test]
200fn test_scan_f64_debug_is_roundtrip_accurate() {
201    macro_rules! check_f64 {
202        ($f:expr) => {
203            assert_match!(
204                <f64>::scan_from(stringify!($f)),
205                Ok(($f, n)) if n == stringify!($f).len()
206            );
207            assert_match!(
208                <f64>::scan_from(concat!("-", stringify!($f))),
209                Ok((-$f, n)) if n == concat!("-", stringify!($f)).len()
210            );
211        };
212    }
213
214    check_f64!(3e-5);
215    check_f64!(12345.67890);
216    check_f64!(2.2250738585072014e-308);
217    check_f64!(1e300);
218    check_f64!(123456789.34567e250);
219    check_f64!(5e-324);
220    check_f64!(91e-324);
221    check_f64!(1e-322);
222    check_f64!(13245643e-320);
223    check_f64!(2.22507385851e-308);
224    check_f64!(2.1e-308);
225    check_f64!(4.9406564584124654e-324);
226}
227
228parse_scanner! { impl<'a> for i8, matcher match_sinteger, matcher err "expected integer", err map ScanError::int }
229parse_scanner! { impl<'a> for i16, matcher match_sinteger, matcher err "expected integer", err map ScanError::int }
230parse_scanner! { impl<'a> for i32, matcher match_sinteger, matcher err "expected integer", err map ScanError::int }
231parse_scanner! { impl<'a> for i64, matcher match_sinteger, matcher err "expected integer", err map ScanError::int }
232parse_scanner! { impl<'a> for isize, matcher match_sinteger, matcher err "expected integer", err map ScanError::int }
233
234parse_scanner! { impl<'a> ScanFromBinary::scan_from_binary for i8, matcher match_bin_int, matcher err "expected binary integer", map |s| i8::from_str_radix(s, 2), err map ScanError::int }
235parse_scanner! { impl<'a> ScanFromBinary::scan_from_binary for i16, matcher match_bin_int, matcher err "expected binary integer", map |s| i16::from_str_radix(s, 2), err map ScanError::int }
236parse_scanner! { impl<'a> ScanFromBinary::scan_from_binary for i32, matcher match_bin_int, matcher err "expected binary integer", map |s| i32::from_str_radix(s, 2), err map ScanError::int }
237parse_scanner! { impl<'a> ScanFromBinary::scan_from_binary for i64, matcher match_bin_int, matcher err "expected binary integer", map |s| i64::from_str_radix(s, 2), err map ScanError::int }
238parse_scanner! { impl<'a> ScanFromBinary::scan_from_binary for isize, matcher match_bin_int, matcher err "expected binary integer", map |s| isize::from_str_radix(s, 2), err map ScanError::int }
239
240parse_scanner! { impl<'a> ScanFromOctal::scan_from_octal for i8, matcher match_oct_int, matcher err "expected octal integer", map |s| i8::from_str_radix(s, 8), err map ScanError::int }
241parse_scanner! { impl<'a> ScanFromOctal::scan_from_octal for i16, matcher match_oct_int, matcher err "expected octal integer", map |s| i16::from_str_radix(s, 8), err map ScanError::int }
242parse_scanner! { impl<'a> ScanFromOctal::scan_from_octal for i32, matcher match_oct_int, matcher err "expected octal integer", map |s| i32::from_str_radix(s, 8), err map ScanError::int }
243parse_scanner! { impl<'a> ScanFromOctal::scan_from_octal for i64, matcher match_oct_int, matcher err "expected octal integer", map |s| i64::from_str_radix(s, 8), err map ScanError::int }
244parse_scanner! { impl<'a> ScanFromOctal::scan_from_octal for isize, matcher match_oct_int, matcher err "expected octal integer", map |s| isize::from_str_radix(s, 8), err map ScanError::int }
245
246parse_scanner! { impl<'a> ScanFromHex::scan_from_hex for i8, matcher match_hex_int, matcher err "expected hex integer", map |s| i8::from_str_radix(s, 16), err map ScanError::int }
247parse_scanner! { impl<'a> ScanFromHex::scan_from_hex for i16, matcher match_hex_int, matcher err "expected hex integer", map |s| i16::from_str_radix(s, 16), err map ScanError::int }
248parse_scanner! { impl<'a> ScanFromHex::scan_from_hex for i32, matcher match_hex_int, matcher err "expected hex integer", map |s| i32::from_str_radix(s, 16), err map ScanError::int }
249parse_scanner! { impl<'a> ScanFromHex::scan_from_hex for i64, matcher match_hex_int, matcher err "expected hex integer", map |s| i64::from_str_radix(s, 16), err map ScanError::int }
250parse_scanner! { impl<'a> ScanFromHex::scan_from_hex for isize, matcher match_hex_int, matcher err "expected hex integer", map |s| isize::from_str_radix(s, 16), err map ScanError::int }
251
252#[cfg(test)]
253#[test]
254fn test_scan_i32() {
255    use ::ScanError as SE;
256    use ::ScanErrorKind as SEK;
257
258    assert_match!(<i32>::scan_from(""), Err(SE { kind: SEK::Syntax(_), .. }));
259    assert_match!(<i32>::scan_from("-"), Err(SE { kind: SEK::Syntax(_), .. }));
260    assert_match!(<i32>::scan_from("+"), Err(SE { kind: SEK::Syntax(_), .. }));
261    assert_match!(<i32>::scan_from("x"), Err(SE { kind: SEK::Syntax(_), .. }));
262    assert_match!(<i32>::scan_from(" "), Err(SE { kind: SEK::Syntax(_), .. }));
263    assert_match!(<i32>::scan_from(" 0"), Err(SE { kind: SEK::Syntax(_), .. }));
264    assert_match!(<i32>::scan_from("0"), Ok((0, 1)));
265    assert_match!(<i32>::scan_from("42"), Ok((42, 2)));
266    assert_match!(<i32>::scan_from("-312"), Ok((-312, 4)));
267    assert_match!(<i32>::scan_from("1_234"), Ok((1, 1)));
268}
269
270parse_scanner! { impl<'a> for u8, matcher match_uinteger, matcher err "expected integer", err map ScanError::int }
271parse_scanner! { impl<'a> for u16, matcher match_uinteger, matcher err "expected integer", err map ScanError::int }
272parse_scanner! { impl<'a> for u32, matcher match_uinteger, matcher err "expected integer", err map ScanError::int }
273parse_scanner! { impl<'a> for u64, matcher match_uinteger, matcher err "expected integer", err map ScanError::int }
274parse_scanner! { impl<'a> for usize, matcher match_uinteger, matcher err "expected integer", err map ScanError::int }
275
276parse_scanner! { impl<'a> ScanFromBinary::scan_from_binary for u8, matcher match_bin_int, matcher err "expected binary integer", map |s| u8::from_str_radix(s, 2), err map ScanError::int }
277parse_scanner! { impl<'a> ScanFromBinary::scan_from_binary for u16, matcher match_bin_int, matcher err "expected binary integer", map |s| u16::from_str_radix(s, 2), err map ScanError::int }
278parse_scanner! { impl<'a> ScanFromBinary::scan_from_binary for u32, matcher match_bin_int, matcher err "expected binary integer", map |s| u32::from_str_radix(s, 2), err map ScanError::int }
279parse_scanner! { impl<'a> ScanFromBinary::scan_from_binary for u64, matcher match_bin_int, matcher err "expected binary integer", map |s| u64::from_str_radix(s, 2), err map ScanError::int }
280parse_scanner! { impl<'a> ScanFromBinary::scan_from_binary for usize, matcher match_bin_int, matcher err "expected binary integer", map |s| usize::from_str_radix(s, 2), err map ScanError::int }
281
282parse_scanner! { impl<'a> ScanFromOctal::scan_from_octal for u8, matcher match_oct_int, matcher err "expected octal integer", map |s| u8::from_str_radix(s, 8), err map ScanError::int }
283parse_scanner! { impl<'a> ScanFromOctal::scan_from_octal for u16, matcher match_oct_int, matcher err "expected octal integer", map |s| u16::from_str_radix(s, 8), err map ScanError::int }
284parse_scanner! { impl<'a> ScanFromOctal::scan_from_octal for u32, matcher match_oct_int, matcher err "expected octal integer", map |s| u32::from_str_radix(s, 8), err map ScanError::int }
285parse_scanner! { impl<'a> ScanFromOctal::scan_from_octal for u64, matcher match_oct_int, matcher err "expected octal integer", map |s| u64::from_str_radix(s, 8), err map ScanError::int }
286parse_scanner! { impl<'a> ScanFromOctal::scan_from_octal for usize, matcher match_oct_int, matcher err "expected octal integer", map |s| usize::from_str_radix(s, 8), err map ScanError::int }
287
288parse_scanner! { impl<'a> ScanFromHex::scan_from_hex for u8, matcher match_hex_int, matcher err "expected hex integer", map |s| u8::from_str_radix(s, 16), err map ScanError::int }
289parse_scanner! { impl<'a> ScanFromHex::scan_from_hex for u16, matcher match_hex_int, matcher err "expected hex integer", map |s| u16::from_str_radix(s, 16), err map ScanError::int }
290parse_scanner! { impl<'a> ScanFromHex::scan_from_hex for u32, matcher match_hex_int, matcher err "expected hex integer", map |s| u32::from_str_radix(s, 16), err map ScanError::int }
291parse_scanner! { impl<'a> ScanFromHex::scan_from_hex for u64, matcher match_hex_int, matcher err "expected hex integer", map |s| u64::from_str_radix(s, 16), err map ScanError::int }
292parse_scanner! { impl<'a> ScanFromHex::scan_from_hex for usize, matcher match_hex_int, matcher err "expected hex integer", map |s| usize::from_str_radix(s, 16), err map ScanError::int }
293
294#[cfg(test)]
295#[test]
296fn test_scan_u32() {
297    use ::ScanError as SE;
298    use ::ScanErrorKind as SEK;
299
300    assert_match!(<u32>::scan_from(""), Err(SE { kind: SEK::Syntax(_), .. }));
301    assert_match!(<u32>::scan_from("-"), Err(SE { kind: SEK::Syntax(_), .. }));
302    assert_match!(<u32>::scan_from("+"), Err(SE { kind: SEK::Syntax(_), .. }));
303    assert_match!(<u32>::scan_from("x"), Err(SE { kind: SEK::Syntax(_), .. }));
304    assert_match!(<u32>::scan_from(" "), Err(SE { kind: SEK::Syntax(_), .. }));
305    assert_match!(<u32>::scan_from(" 0"), Err(SE { kind: SEK::Syntax(_), .. }));
306    assert_match!(<u32>::scan_from("0"), Ok((0, 1)));
307    assert_match!(<u32>::scan_from("42"), Ok((42, 2)));
308    assert_match!(<u32>::scan_from("-312"), Err(SE { kind: SEK::Syntax(_), .. }));
309    assert_match!(<u32>::scan_from("1_234"), Ok((1, 1)));
310}
311
312fn match_bin_int(s: &str) -> Option<((usize, usize), usize)> {
313    s.bytes().enumerate()
314        .take_while(|&(_, b)| matches!(b, b'0' | b'1'))
315        .last()
316        .map(|(i, _)| i + 1)
317        .map(|n| ((0, n), n))
318}
319
320fn match_hex_int(s: &str) -> Option<((usize, usize), usize)> {
321    s.bytes().enumerate()
322        .take_while(|&(_, b)|
323            matches!(b, b'0'...b'9' | b'a'...b'f' | b'A'...b'F'))
324        .last()
325        .map(|(i, _)| i + 1)
326        .map(|n| ((0, n), n))
327}
328
329fn match_oct_int(s: &str) -> Option<((usize, usize), usize)> {
330    s.bytes().enumerate()
331        .take_while(|&(_, b)| matches!(b, b'0'...b'7'))
332        .last()
333        .map(|(i, _)| i + 1)
334        .map(|n| ((0, n), n))
335}
336
337fn match_sinteger(s: &str) -> Option<((usize, usize), usize)> {
338    let mut ibs = s.bytes().enumerate().peekable();
339
340    match ibs.peek().map(|&(_, b)| b) {
341        Some(b'-') | Some(b'+') => { ibs.next(); },
342        _ => (),
343    }
344
345    ibs.take_while(|&(_, b)| matches!(b, b'0'...b'9'))
346        .last()
347        .map(|(i, _)| i + 1)
348        .map(|n| ((0, n), n))
349}
350
351fn match_uinteger(s: &str) -> Option<((usize, usize), usize)> {
352    let mut ibs = s.bytes().enumerate().peekable();
353
354    match ibs.peek().map(|&(_, b)| b) {
355        Some(b'+') => { ibs.next(); },
356        _ => (),
357    }
358
359    ibs.take_while(|&(_, b)| matches!(b, b'0'...b'9'))
360        .last()
361        .map(|(i, _)| i + 1)
362        .map(|n| ((0, n), n))
363}