scan_rules/scanner/
runtime.rs

1/*
2Copyright ⓒ 2016 Daniel Keep.
3
4Licensed under the MIT license (see LICENSE or <http://opensource.org
5/licenses/MIT>) or the Apache License, Version 2.0 (see LICENSE of
6<http://www.apache.org/licenses/LICENSE-2.0>), at your option. All
7files in the project carrying such notice may not be copied, modified,
8or distributed except according to those terms.
9*/
10/*!
11Types and constructors for various runtime scanners.
12*/
13use std::marker::PhantomData;
14use strcursor::StrCursor;
15use ::ScanError;
16use ::input::ScanInput;
17use ::scanner::{ScanFromStr, ScanStr};
18
19#[cfg(feature="regex")] use regex::Regex;
20
21/**
22Creates a runtime scanner that forces *exactly* `width` bytes to be consumed.
23
24This is done in two steps: first, it truncates the input provided to the inner scanner to exactly `width` bytes.  Secondly, it verifies that the inner scanner consumed all of the truncated input.
25
26See: [`exact_width_a`](fn.exact_width_a.html).
27*/
28pub fn exact_width<Then>(width: usize, then: Then) -> ExactWidth<Then> {
29    ExactWidth(width, then)
30}
31
32/**
33Creates a runtime scanner that forces *exactly* `width` bytes to be consumed by the static scanner `S`.
34
35See: [`exact_width`](fn.exact_width.html).
36*/
37pub fn exact_width_a<S>(width: usize) -> ExactWidth<ScanA<S>> {
38    exact_width(width, scan_a::<S>())
39}
40
41/**
42Runtime scanner that forces *exactly* `width` bytes to be consumed.
43
44See: [`exact_width`](fn.exact_width.html), [`exact_width_a`](fn.exact_width_a.html).
45*/
46pub struct ExactWidth<Then>(usize, Then);
47
48impl<'a, Then> ScanStr<'a> for ExactWidth<Then>
49where Then: ScanStr<'a> {
50    type Output = Then::Output;
51
52    fn scan<I: ScanInput<'a>>(&mut self, s: I) -> Result<(Self::Output, usize), ScanError> {
53        let s_str = s.as_str();
54        if s_str.len() < self.0 {
55            return Err(ScanError::syntax("input not long enough"));
56        }
57
58        let sl = s.from_subslice(&s_str[..self.0]);
59
60        match self.1.scan(sl) {
61            Ok((_, n)) if n != self.0 => Err(ScanError::syntax("value did not consume enough characters")),
62            Err(err) => Err(err),
63            Ok((v, _)) => Ok((v, self.0))
64        }
65    }
66
67    fn wants_leading_junk_stripped(&self) -> bool {
68        self.1.wants_leading_junk_stripped()
69    }
70}
71
72#[cfg(test)]
73#[test]
74fn test_exact_width() {
75    use ::ScanError as SE;
76    use ::ScanErrorKind as SEK;
77    use ::scanner::Word;
78    let scan = exact_width_a::<Word>;
79
80    assert_match!(scan(2).scan(""), Err(SE { kind: SEK::Syntax(_), .. }));
81    assert_match!(scan(2).scan("a"), Err(SE { kind: SEK::Syntax(_), .. }));
82    assert_match!(scan(2).scan("a b"), Err(SE { kind: SEK::Syntax(_), .. }));
83    assert_match!(scan(2).scan("ab"), Ok(("ab", 2)));
84    assert_match!(scan(2).scan("abc"), Ok(("ab", 2)));
85}
86
87/**
88Creates a runtime scanner that forces *at most* `width` bytes to be consumed.
89
90This is done by truncating the input provided to the inner scanner to at most `width` bytes.
91
92See: [`max_width_a`](fn.max_width_a.html).
93*/
94pub fn max_width<Then>(width: usize, then: Then) -> MaxWidth<Then> {
95    MaxWidth(width, then)
96}
97
98/**
99Creates a runtime scanner that forces *at most* `width` bytes to be consumed by the static scanner `S`.
100
101See: [`max_width`](fn.max_width.html).
102*/
103pub fn max_width_a<S>(width: usize) -> MaxWidth<ScanA<S>> {
104    max_width(width, scan_a::<S>())
105}
106
107/**
108Runtime scanner that forces *at most* `width` bytes to be consumed.
109
110See: [`max_width`](fn.max_width.html), [`max_width_a`](fn.max_width_a.html).
111*/
112pub struct MaxWidth<Then>(usize, Then);
113
114impl<'a, Then> ScanStr<'a> for MaxWidth<Then>
115where Then: ScanStr<'a> {
116    type Output = Then::Output;
117
118    fn scan<I: ScanInput<'a>>(&mut self, s: I) -> Result<(Self::Output, usize), ScanError> {
119        let s_str = s.as_str();
120        let len = ::std::cmp::min(s_str.len(), self.0);
121        let stop = StrCursor::new_at_left_of_byte_pos(s_str, len);
122        let sl = s.from_subslice(stop.slice_before());
123
124        self.1.scan(sl)
125    }
126
127    fn wants_leading_junk_stripped(&self) -> bool {
128        self.1.wants_leading_junk_stripped()
129    }
130}
131
132#[cfg(test)]
133#[test]
134fn test_max_width() {
135    use ::ScanError as SE;
136    use ::ScanErrorKind as SEK;
137    use ::scanner::Word;
138    let scan = max_width_a::<Word>;
139
140    assert_match!(scan(2).scan(""), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
141    assert_match!(scan(2).scan("a"), Ok(("a", 1)));
142    assert_match!(scan(2).scan("a b"), Ok(("a", 1)));
143    assert_match!(scan(2).scan("ab"), Ok(("ab", 2)));
144    assert_match!(scan(2).scan("abc"), Ok(("ab", 2)));
145}
146
147/**
148Creates a runtime scanner that forces *at least* `width` bytes to be consumed.
149
150This is done by verifying the inner scanner consumed at least `width` bytes.
151
152See: [`min_width_a`](fn.min_width_a.html).
153*/
154pub fn min_width<Then>(width: usize, then: Then) -> MinWidth<Then> {
155    MinWidth(width, then)
156}
157
158/**
159Creates a runtime scanner that forces *at least* `width` bytes to be consumed by the static scanner `S`.
160
161See: [`min_width`](fn.min_width.html).
162*/
163pub fn min_width_a<S>(width: usize) -> MinWidth<ScanA<S>> {
164    min_width(width, scan_a::<S>())
165}
166
167/**
168Runtime scanner that forces *at least* `width` bytes to be consumed.
169
170See: [`min_width`](fn.min_width.html), [`min_width_a`](fn.min_width_a.html).
171*/
172pub struct MinWidth<Then>(usize, Then);
173
174impl<'a, Then> ScanStr<'a> for MinWidth<Then>
175where Then: ScanStr<'a> {
176    type Output = Then::Output;
177
178    fn scan<I: ScanInput<'a>>(&mut self, s: I) -> Result<(Self::Output, usize), ScanError> {
179        let s_str = s.as_str();
180        if s_str.len() < self.0 {
181            return Err(ScanError::syntax("expected more bytes to scan"));
182        }
183        match self.1.scan(s) {
184            Ok((_, n)) if n < self.0 => Err(ScanError::syntax("scanned value too short")),
185            other => other
186        }
187    }
188
189    fn wants_leading_junk_stripped(&self) -> bool {
190        self.1.wants_leading_junk_stripped()
191    }
192}
193
194#[cfg(test)]
195#[test]
196fn test_min_width() {
197    use ::ScanError as SE;
198    use ::ScanErrorKind as SEK;
199    use ::scanner::Word;
200    let scan = min_width_a::<Word>;
201
202    assert_match!(scan(2).scan(""), Err(SE { kind: SEK::Syntax(_), .. }));
203    assert_match!(scan(2).scan("a"), Err(SE { kind: SEK::Syntax(_), .. }));
204    assert_match!(scan(2).scan("a b"), Err(SE { kind: SEK::Syntax(_), .. }));
205    assert_match!(scan(2).scan("ab"), Ok(("ab", 2)));
206    assert_match!(scan(2).scan("abc"), Ok(("abc", 3)));
207}
208
209/**
210Creates a runtime scanner that extracts a slice of the input using a regular expression, then scans the result using `Then`.
211
212**Note**: requires the `regex` feature.
213
214If the regular expression defines a group named `scan`, then it will extract the contents of that group.  Failing that, it will use the the first capturing group.  If there are no capturing groups, it will extract the entire match.
215
216Irrespective of the amount of input provided by the regex scanner to the inner scanner, the regex scanner will only consume the portion that the inner scanner did.
217
218Note that this scanner *does not* respect the case sensitivity of the input.
219
220See: [`regex` crate](http://doc.rust-lang.org/regex/regex/index.html), [`re_a`](fn.re_a.html), [`re_str`](fn.re_str.html).
221*/
222#[cfg(feature="regex")]
223pub fn re<Then>(s: &str, then: Then) -> ScanRegex<Then> {
224    ScanRegex(Regex::new(s).unwrap(), then)
225}
226
227/**
228Creates a runtime regex scanner that passes the matched input to a static scanner `S`.
229
230**Note**: requires the `regex` feature.
231
232See: [`re`](fn.re_a.html).
233*/
234#[cfg(feature="regex")]
235pub fn re_a<S>(s: &str) -> ScanRegex<ScanA<S>> {
236    re(s, scan_a::<S>())
237}
238
239/**
240Creates a runtime regex scanner that yields the matched input as a string slice.
241
242**Note**: requires the `regex` feature.
243
244See: [`re`](fn.re_a.html).
245*/
246#[cfg(feature="regex")]
247pub fn re_str(s: &str) -> ScanRegex<ScanA<::scanner::Everything<&str>>> {
248    re_a::<::scanner::Everything<&str>>(s)
249}
250
251/**
252Runtime scanner that slices the input based on a regular expression.
253
254**Note**: requires the `regex` feature.
255
256See: [`re`](../fn.re.html), [`re_a`](../fn.re_a.html), [`re_str`](../fn.re_str.html).
257*/
258#[cfg(feature="regex")]
259pub struct ScanRegex<Then>(Regex, Then);
260
261#[cfg(feature="regex")]
262impl<'a, Then> ScanStr<'a> for ScanRegex<Then>
263where Then: ScanStr<'a> {
264    type Output = Then::Output;
265
266    fn scan<I: ScanInput<'a>>(&mut self, s: I) -> Result<(Self::Output, usize), ScanError> {
267        let s_str = s.as_str();
268        let cap = match self.0.captures(s_str) {
269            None => return Err(ScanError::syntax("no match for regular expression")),
270            Some(cap) => cap,
271        };
272
273        let cover = match cap.pos(0) {
274            None => return Err(ScanError::syntax("no match for regular expression")),
275            Some(pos) => pos,
276        };
277
278        let sl = if let Some(sl) = cap.name("scan") {
279            sl
280        } else if let Some((a, b)) = cap.pos(1) {
281            &s_str[a..b]
282        } else {
283            &s_str[cover.0 .. cover.1]
284        };
285
286        let sl = s.from_subslice(sl);
287
288        match self.1.scan(sl) {
289            Ok((v, _)) => Ok((v, cover.1)),
290            Err(err) => Err(err),
291        }
292    }
293
294    fn wants_leading_junk_stripped(&self) -> bool {
295        self.1.wants_leading_junk_stripped()
296    }
297}
298
299#[cfg(feature="regex")]
300#[cfg(test)]
301#[test]
302fn test_re() {
303    use ::ScanError as SE;
304    use ::ScanErrorKind as SEK;
305    let scan = re_str;
306
307    assert_match!(scan("[a-z][0-9]").scan(""), Err(SE { kind: SEK::Syntax(_), .. }));
308    assert_match!(scan("[a-z][0-9]").scan("a"), Err(SE { kind: SEK::Syntax(_), .. }));
309    assert_match!(scan("[a-z][0-9]").scan("a 0"), Err(SE { kind: SEK::Syntax(_), .. }));
310    assert_match!(scan("[a-z][0-9]").scan("a0"), Ok(("a0", 2)));
311    assert_match!(scan("[a-z][0-9]").scan("a0c"), Ok(("a0", 2)));
312    assert_match!(scan("[a-z][0-9]").scan(" a0"), Ok(("a0", 3)));
313}
314
315/**
316Returns a runtime scanner that delegates to a static scanner.
317*/
318pub fn scan_a<S>() -> ScanA<S> {
319    ScanA(PhantomData)
320}
321
322/**
323Runtime scanner that delegates to a static scanner.
324
325See: [`scan_a`](../fn.scan_a.html).
326*/
327pub struct ScanA<S>(PhantomData<S>);
328
329impl<'a, S> ScanStr<'a> for ScanA<S>
330where S: ScanFromStr<'a> {
331    type Output = S::Output;
332
333    fn scan<I: ScanInput<'a>>(&mut self, s: I) -> Result<(Self::Output, usize), ScanError> {
334        <S as ScanFromStr<'a>>::scan_from(s)
335    }
336
337    fn wants_leading_junk_stripped(&self) -> bool {
338        <S as ScanFromStr<'a>>::wants_leading_junk_stripped()
339    }
340}
341
342/**
343Creates a runtime scanner that will extract a slice of the input up to, but *not* including, a specified string pattern.
344
345**Note**: requires the `nightly-pattern` feature and a nightly compiler.
346
347Note that this scanner *does not* respect the case sensitivity of the input.
348
349See: [`until_pat_a`](fn.until_pat_a.html), [`until_pat_str`](fn.until_pat_str.html).
350*/
351#[cfg(feature="nightly-pattern")]
352pub fn until_pat<Then, P>(pat: P, then: Then) -> UntilPat<Then, P> {
353    UntilPat(pat, then)
354}
355
356/**
357Creates a runtime scanner that will extract a slice of the input up to, but *not* including, a specified string pattern, and passes it to the static scanner `S`.
358
359**Note**: requires the `nightly-pattern` feature and a nightly compiler.
360
361Note that this scanner *does not* respect the case sensitivity of the input.
362
363See: [`until_pat`](fn.until_pat.html).
364*/
365#[cfg(feature="nightly-pattern")]
366pub fn until_pat_a<S, P>(pat: P) -> UntilPat<ScanA<S>, P> {
367    until_pat(pat, scan_a::<S>())
368}
369
370/**
371Creates a runtime scanner that will extract a slice of the input up to, but *not* including, a specified string pattern.
372
373**Note**: requires the `nightly-pattern` feature and a nightly compiler.
374
375Note that this scanner *does not* respect the case sensitivity of the input.
376
377See: [`until_pat`](fn.until_pat.html).
378*/
379#[cfg(feature="nightly-pattern")]
380pub fn until_pat_str<'a, P>(pat: P) -> UntilPat<ScanA<::scanner::Everything<'a, &'a str>>, P> {
381    until_pat_a::<::scanner::Everything<&str>, _>(pat)
382}
383
384/**
385Runtime scanner that slices the input based on a string pattern.
386
387**Note**: requires the `nightly-pattern` feature and a nightly compiler.
388
389See: [`until_pat`](../fn.until_pat.html).
390*/
391#[cfg(feature="nightly-pattern")]
392pub struct UntilPat<Then, P>(P, Then);
393
394/**
395# Why This Bound?
396
397Ideally, `P: Pattern` would imply `&P: Pattern`, but it doesn't.  As such, we have to choose from one of two alternatives:
398
399- `for<'b> P: Copy + Pattern<'b>`
400- `for<'b, 'c> &'b P: Pattern<'c>`
401
402The first allows us to use (as of 2016-03-05) all `Pattern` impls *except* the `F: FnMut(char) -> bool` one; the second only allows us to use `&&str`.
403
404This is a bit disappointing, since the biggest draw for `Pattern` is definitely using callables (*e.g.* `until_str(char::is_whitespace)`), but it currently can't be helped.
405
406## Why Not `Clone`?
407
408This makes me a bit nervous.  The `clone` would need to happen on every scan; if this is inside a loop, this could happen *a lot*.  As such, I felt it was a better idea to restrict this to patterns which are guaranteed to be cheap to copy.
409*/
410#[cfg(feature="nightly-pattern")]
411impl<'a, Then, P> ScanStr<'a> for UntilPat<Then, P>
412where
413    Then: ScanStr<'a>,
414    for<'b> P: Copy + ::std::str::pattern::Pattern<'b>,
415{
416    type Output = Then::Output;
417
418    fn scan<I: ScanInput<'a>>(&mut self, s: I) -> Result<(Self::Output, usize), ScanError> {
419        let s_str = s.as_str();
420        let off = match s_str.find(self.0) {
421            Some(off) => off,
422            None => return Err(ScanError::syntax("no match for pattern")),
423        };
424
425        let sl = &s_str[..off];
426        let sl = s.from_subslice(sl);
427
428        self.1.scan(sl)
429    }
430
431    fn wants_leading_junk_stripped(&self) -> bool {
432        self.1.wants_leading_junk_stripped()
433    }
434}
435
436#[cfg(feature="nightly-pattern")]
437#[cfg(test)]
438#[test]
439fn test_until() {
440    use ::ScanError as SE;
441    use ::ScanErrorKind as SEK;
442
443    #[allow(non_snake_case)]
444    fn S(s: &str) -> String { String::from(s) }
445
446    assert_match!(until_pat_str("x").scan(""), Err(SE { kind: SEK::Syntax(_), .. }));
447    assert_match!(until_pat_str("x").scan("a"), Err(SE { kind: SEK::Syntax(_), .. }));
448    assert_match!(until_pat_str("x").scan("ab"), Err(SE { kind: SEK::Syntax(_), .. }));
449    assert_match!(until_pat_str("x").scan("x"), Ok(("", 0)));
450    assert_match!(until_pat_str("x").scan("ax"), Ok(("a", 1)));
451    assert_match!(until_pat_str("x").scan("abx"), Ok(("ab", 2)));
452
453    assert_match!(until_pat_str(&"x").scan(""), Err(SE { kind: SEK::Syntax(_), .. }));
454    assert_match!(until_pat_str(&"x").scan("a"), Err(SE { kind: SEK::Syntax(_), .. }));
455    assert_match!(until_pat_str(&"x").scan("ab"), Err(SE { kind: SEK::Syntax(_), .. }));
456    assert_match!(until_pat_str(&"x").scan("x"), Ok(("", 0)));
457    assert_match!(until_pat_str(&"x").scan("ax"), Ok(("a", 1)));
458    assert_match!(until_pat_str(&"x").scan("abx"), Ok(("ab", 2)));
459
460    assert_match!(until_pat_str(&S("x")).scan(""), Err(SE { kind: SEK::Syntax(_), .. }));
461    assert_match!(until_pat_str(&S("x")).scan("a"), Err(SE { kind: SEK::Syntax(_), .. }));
462    assert_match!(until_pat_str(&S("x")).scan("ab"), Err(SE { kind: SEK::Syntax(_), .. }));
463    assert_match!(until_pat_str(&S("x")).scan("x"), Ok(("", 0)));
464    assert_match!(until_pat_str(&S("x")).scan("ax"), Ok(("a", 1)));
465    assert_match!(until_pat_str(&S("x")).scan("abx"), Ok(("ab", 2)));
466
467    assert_match!(until_pat_str('x').scan(""), Err(SE { kind: SEK::Syntax(_), .. }));
468    assert_match!(until_pat_str('x').scan("a"), Err(SE { kind: SEK::Syntax(_), .. }));
469    assert_match!(until_pat_str('x').scan("ab"), Err(SE { kind: SEK::Syntax(_), .. }));
470    assert_match!(until_pat_str('x').scan("x"), Ok(("", 0)));
471    assert_match!(until_pat_str('x').scan("ax"), Ok(("a", 1)));
472    assert_match!(until_pat_str('x').scan("abx"), Ok(("ab", 2)));
473
474    assert_match!(until_pat_str(&['x'][..]).scan(""), Err(SE { kind: SEK::Syntax(_), .. }));
475    assert_match!(until_pat_str(&['x'][..]).scan("a"), Err(SE { kind: SEK::Syntax(_), .. }));
476    assert_match!(until_pat_str(&['x'][..]).scan("ab"), Err(SE { kind: SEK::Syntax(_), .. }));
477    assert_match!(until_pat_str(&['x'][..]).scan("x"), Ok(("", 0)));
478    assert_match!(until_pat_str(&['x'][..]).scan("ax"), Ok(("a", 1)));
479    assert_match!(until_pat_str(&['x'][..]).scan("abx"), Ok(("ab", 2)));
480}