scan_rules/scanner/
misc.rs

1/*
2Copyright ⓒ 2016 Daniel Keep.
3
4Licensed under the MIT license (see LICENSE or <http://opensource.org
5/licenses/MIT>) or the Apache License, Version 2.0 (see LICENSE of
6<http://www.apache.org/licenses/LICENSE-2.0>), at your option. All
7files in the project carrying such notice may not be copied, modified,
8or distributed except according to those terms.
9*/
10/*!
11Miscellaneous, abstract scanners.
12*/
13use std::marker::PhantomData;
14use strcursor::StrCursor;
15use ::ScanError;
16use ::input::ScanInput;
17use ::util::StrUtil;
18use super::{
19    ScanFromStr, ScanSelfFromStr,
20    ScanFromBinary, ScanFromOctal, ScanFromHex,
21};
22
23/**
24Scans the given `Output` type from its binary representation.
25*/
26pub struct Binary<Output>(PhantomData<Output>);
27
28impl<'a, Output> ScanFromStr<'a> for Binary<Output>
29where Output: ScanFromBinary<'a> {
30    type Output = Output;
31    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
32        Output::scan_from_binary(s)
33    }
34}
35
36#[cfg(test)]
37#[test]
38fn test_binary() {
39    assert_match!(Binary::<i32>::scan_from("0 1 2 x"), Ok((0b0, 1)));
40    assert_match!(Binary::<i32>::scan_from("012x"), Ok((0b1, 2)));
41    assert_match!(Binary::<i32>::scan_from("0b012x"), Ok((0b0, 1)));
42    assert_match!(Binary::<i32>::scan_from("110010101110000b"), Ok((0x6570, 15)));
43}
44
45/**
46Scans all remaining input into a string.
47
48In most cases, you should use the `.. name` tail capture term to perform this task.  This scanner is provided as a way to do this in contexts where tail capture is not valid (because it normally wouldn't make any sense).
49*/
50pub struct Everything<'a, Output=&'a str>(PhantomData<(&'a (), Output)>);
51
52#[cfg(str_into_output_extra_broken)]
53impl<'a> ScanFromStr<'a> for Everything<'a, &'a str> {
54    type Output = &'a str;
55    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
56        let s = s.as_str();
57        Ok((s.into(), s.len()))
58    }
59}
60
61#[cfg(str_into_output_extra_broken)]
62impl<'a> ScanFromStr<'a> for Everything<'a, String> {
63    type Output = String;
64    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
65        let s = s.as_str();
66        Ok((s.into(), s.len()))
67    }
68}
69
70#[cfg(not(str_into_output_extra_broken))]
71impl<'a, Output> ScanFromStr<'a> for Everything<'a, Output>
72where &'a str: Into<Output> {
73    type Output = Output;
74    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
75        let s = s.as_str();
76        Ok((s.into(), s.len()))
77    }
78}
79
80#[cfg(test)]
81#[test]
82fn test_everything() {
83    // That's the scanner named `Everything`, not literally everything.
84    assert_match!(Everything::<&str>::scan_from(""), Ok(("", 0)));
85    assert_match!(Everything::<&str>::scan_from("で"), Ok(("で", 3)));
86    assert_match!(Everything::<&str>::scan_from("うまいー うまいー ぼうぼうぼうぼう"), Ok(("うまいー うまいー ぼうぼうぼうぼう", 54)));
87}
88
89/**
90Scans the given `Output` type from its hexadecimal representation.
91*/
92pub struct Hex<Output>(PhantomData<Output>);
93
94impl<'a, Output> ScanFromStr<'a> for Hex<Output>
95where Output: ScanFromHex<'a> {
96    type Output = Output;
97    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
98        Output::scan_from_hex(s)
99    }
100}
101
102#[cfg(test)]
103#[test]
104fn test_hex() {
105    assert_match!(Hex::<i32>::scan_from("0 1 2 x"), Ok((0x0, 1)));
106    assert_match!(Hex::<i32>::scan_from("012x"), Ok((0x12, 3)));
107    assert_match!(Hex::<i32>::scan_from("0x012x"), Ok((0x0, 1)));
108    assert_match!(Hex::<i32>::scan_from("BadCafé"), Ok((0xbadcaf, 6)));
109}
110
111/**
112Scans a sequence of horizontal (non-newline) space characters into a string.
113
114This *will not* match an empty sequence; there must be at least one space character for the scan to succeed.
115*/
116pub struct HorSpace<'a, Output=&'a str>(PhantomData<(&'a (), Output)>);
117
118// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
119#[cfg(str_into_output_extra_broken)]
120impl<'a> ScanFromStr<'a> for HorSpace<'a, &'a str> {
121    type Output = &'a str;
122
123    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
124        let s = s.as_str();
125        match match_hor_space(s) {
126            Some(b) => {
127                let word = &s[..b];
128                let tail = &s[b..];
129                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
130            },
131            // None => Err(ScanError::syntax("expected a space")),
132            None => Err(ScanError::syntax_no_message()),
133        }
134    }
135
136    fn wants_leading_junk_stripped() -> bool { false }
137}
138
139// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
140#[cfg(str_into_output_extra_broken)]
141impl<'a> ScanFromStr<'a> for HorSpace<'a, String> {
142    type Output = String;
143
144    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
145        let s = s.as_str();
146        match match_hor_space(s) {
147            Some(b) => {
148                let word = &s[..b];
149                let tail = &s[b..];
150                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
151            },
152            // None => Err(ScanError::syntax("expected a space")),
153            None => Err(ScanError::syntax_no_message()),
154        }
155    }
156
157    fn wants_leading_junk_stripped() -> bool { false }
158}
159
160// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
161#[cfg(not(str_into_output_extra_broken))]
162impl<'a, Output> ScanFromStr<'a> for HorSpace<'a, Output>
163where &'a str: Into<Output> {
164    type Output = Output;
165
166    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
167        let s = s.as_str();
168        match match_hor_space(s) {
169            Some(b) => {
170                let word = &s[..b];
171                let tail = &s[b..];
172                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
173            },
174            // None => Err(ScanError::syntax("expected a space")),
175            None => Err(ScanError::syntax_no_message()),
176        }
177    }
178
179    fn wants_leading_junk_stripped() -> bool { false }
180}
181
182fn match_hor_space(s: &str) -> Option<usize> {
183    use ::util::TableUtil;
184    use ::unicode::property::White_Space_table as WS;
185
186    s.char_indices()
187        .take_while(|&(_, c)| match c {
188            '\x0a'...'\x0d' | '\u{85}' | '\u{2028}' | '\u{2029}' => false,
189            c => WS.span_table_contains(&c)
190        })
191        .map(|(i, c)| i + c.len_utf8())
192        .last()
193}
194
195#[cfg(test)]
196#[test]
197fn test_hor_space() {
198    use ::ScanError as SE;
199    use ::ScanErrorKind as SEK;
200
201    assert_match!(HorSpace::<&str>::scan_from(""), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
202    assert_match!(HorSpace::<&str>::scan_from("a"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
203    assert_match!(HorSpace::<&str>::scan_from("0"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
204    assert_match!(HorSpace::<&str>::scan_from(" "), Ok((" ", 1)));
205    assert_match!(HorSpace::<&str>::scan_from("\t"), Ok(("\t", 1)));
206    assert_match!(HorSpace::<&str>::scan_from("\r"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
207    assert_match!(HorSpace::<&str>::scan_from("\n"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
208    assert_match!(HorSpace::<&str>::scan_from("\r\n"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
209    assert_match!(HorSpace::<&str>::scan_from("  \t \n \t\t "), Ok(("  \t ", 4)));
210}
211
212/**
213Scans a single identifier into a string.
214
215Specifically, this will match a single `XID_Start` character (or underscore) followed by zero or more `XID_Continue` characters.
216*/
217pub struct Ident<'a, Output=&'a str>(PhantomData<(&'a (), Output)>);
218
219// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
220#[cfg(str_into_output_extra_broken)]
221impl<'a> ScanFromStr<'a> for Ident<'a, &'a str> {
222    type Output = &'a str;
223    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
224        let s = s.as_str();
225        match match_ident(s) {
226            Some(b) => {
227                let word = &s[..b];
228                let tail = &s[b..];
229                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
230            },
231            None => {
232                // Err(ScanError::syntax("expected identifier"))
233                Err(ScanError::syntax_no_message())
234            },
235        }
236    }
237}
238
239// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
240#[cfg(str_into_output_extra_broken)]
241impl<'a> ScanFromStr<'a> for Ident<'a, String> {
242    type Output = String;
243    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
244        let s = s.as_str();
245        match match_ident(s) {
246            Some(b) => {
247                let word = &s[..b];
248                let tail = &s[b..];
249                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
250            },
251            None => {
252                // Err(ScanError::syntax("expected identifier"))
253                Err(ScanError::syntax_no_message())
254            },
255        }
256    }
257}
258
259#[cfg(not(str_into_output_extra_broken))]
260// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
261impl<'a, Output> ScanFromStr<'a> for Ident<'a, Output>
262where &'a str: Into<Output> {
263    type Output = Output;
264    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
265        let s = s.as_str();
266        match match_ident(s) {
267            Some(b) => {
268                let word = &s[..b];
269                let tail = &s[b..];
270                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
271            },
272            None => {
273                // Err(ScanError::syntax("expected identifier"))
274                Err(ScanError::syntax_no_message())
275            },
276        }
277    }
278}
279
280fn match_ident(s: &str) -> Option<usize> {
281    use ::util::TableUtil;
282    use ::unicode::derived_property::{XID_Continue_table, XID_Start_table};
283
284    let mut ics = s.char_indices();
285
286    let first_len = match ics.next() {
287        Some((_, '_')) => 1,
288        Some((_, c)) if XID_Start_table.span_table_contains(&c) => c.len_utf8(),
289        _ => return None,
290    };
291
292    let len = ics
293        .take_while(|&(_, c)| XID_Continue_table.span_table_contains(&c))
294        .map(|(i, c)| i + c.len_utf8())
295        .last()
296        .unwrap_or(first_len);
297
298    Some(len)
299}
300
301#[cfg(test)]
302#[test]
303fn test_ident() {
304    use ::ScanError as SE;
305    use ::ScanErrorKind as SEK;
306
307    assert_eq!(match_ident("a"), Some(1));
308
309    assert_match!(Ident::<&str>::scan_from(""), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
310    assert_match!(Ident::<&str>::scan_from("a"), Ok(("a", 1)));
311    assert_match!(Ident::<&str>::scan_from("two words "), Ok(("two", 3)));
312    assert_match!(Ident::<&str>::scan_from("two_words "), Ok(("two_words", 9)));
313    assert_match!(Ident::<&str>::scan_from("0123abc456 "), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
314    assert_match!(Ident::<&str>::scan_from("_0123abc456 "), Ok(("_0123abc456", 11)));
315    assert_match!(Ident::<&str>::scan_from("f(blah)"), Ok(("f", 1)));
316}
317
318/**
319Explicitly infer the type of a scanner.
320
321This is useful in cases where you want to only *partially* specify a scanner type, but the partial type cannot be inferred under normal circumstances.
322
323For example, tuples allow their element types to scan to be abstract scanners; *e.g.* `(Word<String>, Hex<i32>)` will scan to `(String, i32)`.  However, this interferes with inferring the scanner type when you *partially* specify a tuple type.  If you attempt to store the result of scanning `(_, _)` into a `(String, i32)`, Rust cannot determine whether the *scanner* type should be `(String, Hex<i32>)`, or `(Word<String>, i32)`, or something else entirely.
324
325This scanner, then, *requires* that the inner type scan to itself and *only* to itself.
326*/
327pub struct Inferred<T>(PhantomData<T>);
328
329impl<'a, T> ScanFromStr<'a> for Inferred<T>
330where T: ScanSelfFromStr<'a> {
331    type Output = T;
332    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
333        T::scan_from(s)
334    }
335}
336
337/**
338Scans everything up to the end of the current line, *or* the end of the input, whichever comes first.  The scanned result *does not* include the line terminator.
339
340Note that this is effectively equivalent to the `Everything` matcher when used with `readln!`.
341*/
342pub struct Line<'a, Output=&'a str>(PhantomData<(&'a (), Output)>);
343
344#[cfg(str_into_output_extra_broken)]
345impl<'a> ScanFromStr<'a> for Line<'a, &'a str> {
346    type Output = &'a str;
347    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
348        let s = s.as_str();
349        let (a, b) = match_line(s);
350        Ok((s[..a].into(), b))
351    }
352}
353
354#[cfg(str_into_output_extra_broken)]
355impl<'a> ScanFromStr<'a> for Line<'a, String> {
356    type Output = String;
357    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
358        let s = s.as_str();
359        let (a, b) = match_line(s);
360        Ok((s[..a].into(), b))
361    }
362}
363
364#[cfg(not(str_into_output_extra_broken))]
365impl<'a, Output> ScanFromStr<'a> for Line<'a, Output> where &'a str: Into<Output> {
366    type Output = Output;
367    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
368        let s = s.as_str();
369        let (a, b) = match_line(s);
370        Ok((s[..a].into(), b))
371    }
372}
373
374fn match_line(s: &str) -> (usize, usize) {
375    let mut ibs = s.bytes().enumerate();
376
377    let line_end;
378
379    loop {
380        match ibs.next() {
381            Some((i, b'\r')) => {
382                line_end = i;
383                break;
384            },
385            Some((i, b'\n')) => return (i, i+1),
386            Some(_) => (),
387            None => return (s.len(), s.len()),
388        }
389    }
390
391    // If we get here, it's because we found an `\r` and need to look for an `\n`.
392    if let Some((_, b'\n')) = ibs.next() {
393        (line_end, line_end + 2)
394    } else {
395        (line_end, line_end + 1)
396    }
397}
398
399#[cfg(test)]
400#[test]
401fn test_line() {
402    assert_match!(Line::<&str>::scan_from(""), Ok(("", 0)));
403    assert_match!(Line::<&str>::scan_from("abc def"), Ok(("abc def", 7)));
404    assert_match!(Line::<&str>::scan_from("abc\ndef"), Ok(("abc", 4)));
405    assert_match!(Line::<&str>::scan_from("abc\r\ndef"), Ok(("abc", 5)));
406    assert_match!(Line::<&str>::scan_from("abc\rdef"), Ok(("abc", 4)));
407}
408
409/**
410Scans a single newline into a string.
411
412This *will not* match an empty sequence, and will not match more than one newline.
413*/
414pub struct Newline<'a, Output=&'a str>(PhantomData<(&'a (), Output)>);
415
416// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
417#[cfg(str_into_output_extra_broken)]
418impl<'a> ScanFromStr<'a> for Newline<'a, &'a str> {
419    type Output = &'a str;
420    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
421        let s = s.as_str();
422        match match_newline(s) {
423            Some(b) => {
424                let word = &s[..b];
425                let tail = &s[b..];
426                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
427            },
428            // None => Err(ScanError::syntax("expected at least one non-space character")),
429            None => Err(ScanError::syntax_no_message())
430        }
431    }
432
433    fn wants_leading_junk_stripped() -> bool { false }
434}
435
436// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
437#[cfg(str_into_output_extra_broken)]
438impl<'a> ScanFromStr<'a> for Newline<'a, String> {
439    type Output = String;
440    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
441        let s = s.as_str();
442        match match_newline(s) {
443            Some(b) => {
444                let word = &s[..b];
445                let tail = &s[b..];
446                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
447            },
448            // None => Err(ScanError::syntax("expected at least one non-space character")),
449            None => Err(ScanError::syntax_no_message())
450        }
451    }
452
453    fn wants_leading_junk_stripped() -> bool { false }
454}
455
456// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
457#[cfg(not(str_into_output_extra_broken))]
458impl<'a, Output> ScanFromStr<'a> for Newline<'a, Output>
459where &'a str: Into<Output> {
460    type Output = Output;
461    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
462        let s = s.as_str();
463        match match_newline(s) {
464            Some(b) => {
465                let word = &s[..b];
466                let tail = &s[b..];
467                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
468            },
469            // None => Err(ScanError::syntax("expected at least one non-space character")),
470            None => Err(ScanError::syntax_no_message())
471        }
472    }
473
474    fn wants_leading_junk_stripped() -> bool { false }
475}
476
477fn match_newline(s: &str) -> Option<usize> {
478    // See: <http://www.unicode.org/reports/tr18/#RL1.6>.
479    println!("match_newline({:?})", s);
480    let mut cis = s.char_indices();
481
482    let r = match cis.next() {
483        Some((_, '\x0a')) => Some(1),
484        Some((_, '\x0b')) => Some(1),
485        Some((_, '\x0c')) => Some(1),
486        Some((_, '\x0d')) => match cis.next() {
487            Some((_, '\x0a')) => Some(2),
488            _ => Some(1)
489        },
490        Some((_, c @ '\u{85}')) => Some(c.len_utf8()),
491        Some((_, c @ '\u{2028}')) => Some(c.len_utf8()),
492        Some((_, c @ '\u{2029}')) => Some(c.len_utf8()),
493        _ => None
494    };
495
496    println!("-> {:?}", r);
497    r
498}
499
500#[cfg(test)]
501#[test]
502fn test_newline() {
503    use ::ScanError as SE;
504    use ::ScanErrorKind as SEK;
505
506    assert_match!(Newline::<&str>::scan_from(""), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
507    assert_match!(Newline::<&str>::scan_from("x"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
508    assert_match!(Newline::<&str>::scan_from("\rx"), Ok(("\r", 1)));
509    assert_match!(Newline::<&str>::scan_from("\nx"), Ok(("\n", 1)));
510    assert_match!(Newline::<&str>::scan_from("\r\nx"), Ok(("\r\n", 2)));
511    assert_match!(Newline::<&str>::scan_from("\n\rx"), Ok(("\n", 1)));
512}
513
514/**
515Scans a sequence of non-space characters into a string.
516
517This *will not* match an empty sequence; there must be at least one non-space character for the scan to succeed.
518*/
519pub struct NonSpace<'a, Output=&'a str>(PhantomData<(&'a (), Output)>);
520
521// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
522#[cfg(str_into_output_extra_broken)]
523impl<'a> ScanFromStr<'a> for NonSpace<'a, &'a str> {
524    type Output = &'a str;
525    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
526        let s = s.as_str();
527        match match_non_space(s) {
528            Some(b) => {
529                let word = &s[..b];
530                let tail = &s[b..];
531                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
532            },
533            // None => Err(ScanError::syntax("expected at least one non-space character")),
534            None => Err(ScanError::syntax_no_message())
535        }
536    }
537}
538
539// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
540#[cfg(str_into_output_extra_broken)]
541impl<'a> ScanFromStr<'a> for NonSpace<'a, String> {
542    type Output = String;
543    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
544        let s = s.as_str();
545        match match_non_space(s) {
546            Some(b) => {
547                let word = &s[..b];
548                let tail = &s[b..];
549                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
550            },
551            // None => Err(ScanError::syntax("expected at least one non-space character")),
552            None => Err(ScanError::syntax_no_message())
553        }
554    }
555}
556
557// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
558#[cfg(not(str_into_output_extra_broken))]
559impl<'a, Output> ScanFromStr<'a> for NonSpace<'a, Output>
560where &'a str: Into<Output> {
561    type Output = Output;
562    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
563        let s = s.as_str();
564        match match_non_space(s) {
565            Some(b) => {
566                let word = &s[..b];
567                let tail = &s[b..];
568                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
569            },
570            // None => Err(ScanError::syntax("expected at least one non-space character")),
571            None => Err(ScanError::syntax_no_message())
572        }
573    }
574}
575
576fn match_non_space(s: &str) -> Option<usize> {
577    use ::util::TableUtil;
578    use ::unicode::property::White_Space_table as WS;
579
580    s.char_indices()
581        .take_while(|&(_, c)| !WS.span_table_contains(&c))
582        .map(|(i, c)| i + c.len_utf8())
583        .last()
584}
585
586#[cfg(test)]
587#[test]
588fn test_non_space() {
589    use ::ScanError as SE;
590    use ::ScanErrorKind as SEK;
591
592    assert_match!(NonSpace::<&str>::scan_from(""), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
593    assert_match!(NonSpace::<&str>::scan_from(" abc "), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
594    assert_match!(NonSpace::<&str>::scan_from("abc "), Ok(("abc", 3)));
595    assert_match!(NonSpace::<&str>::scan_from("abc\t"), Ok(("abc", 3)));
596    assert_match!(NonSpace::<&str>::scan_from("abc\r"), Ok(("abc", 3)));
597    assert_match!(NonSpace::<&str>::scan_from("abc\n"), Ok(("abc", 3)));
598    assert_match!(NonSpace::<&str>::scan_from("abc\u{a0}"), Ok(("abc", 3)));
599    assert_match!(NonSpace::<&str>::scan_from("abc\u{2003}"), Ok(("abc", 3)));
600    assert_match!(NonSpace::<&str>::scan_from("abc\u{200B}"), Ok(("abc\u{200b}", 6)));
601    assert_match!(NonSpace::<&str>::scan_from("abc\u{3000}"), Ok(("abc", 3)));
602}
603
604/**
605Scans a single number into a string.
606
607Specifically, this will match a continuous run of decimal characters (*i.e.* /`\d+`/).
608
609Note that this *includes* non-ASCII decimal characters, meaning it will scan numbers such as "42", "1701", and "𐒩0꘠᧑".
610*/
611pub struct Number<'a, Output=&'a str>(PhantomData<(&'a (), Output)>);
612
613// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
614#[cfg(str_into_output_extra_broken)]
615impl<'a> ScanFromStr<'a> for Number<'a, &'a str> {
616    type Output = &'a str;
617    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
618        let s = s.as_str();
619        match match_number(s) {
620            Some(b) => {
621                let word = &s[..b];
622                let tail = &s[b..];
623                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
624            },
625            // None => Err(ScanError::syntax("expected a number")),
626            None => Err(ScanError::syntax_no_message()),
627        }
628    }
629}
630
631// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
632#[cfg(str_into_output_extra_broken)]
633impl<'a> ScanFromStr<'a> for Number<'a, String> {
634    type Output = String;
635    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
636        let s = s.as_str();
637        match match_number(s) {
638            Some(b) => {
639                let word = &s[..b];
640                let tail = &s[b..];
641                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
642            },
643            // None => Err(ScanError::syntax("expected a number")),
644            None => Err(ScanError::syntax_no_message()),
645        }
646    }
647}
648
649// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
650#[cfg(not(str_into_output_extra_broken))]
651impl<'a, Output> ScanFromStr<'a> for Number<'a, Output>
652where &'a str: Into<Output> {
653    type Output = Output;
654    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
655        let s = s.as_str();
656        match match_number(s) {
657            Some(b) => {
658                let word = &s[..b];
659                let tail = &s[b..];
660                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
661            },
662            // None => Err(ScanError::syntax("expected a number")),
663            None => Err(ScanError::syntax_no_message()),
664        }
665    }
666}
667
668fn match_number(s: &str) -> Option<usize> {
669    use ::util::TableUtil;
670    use ::unicode::general_category::Nd_table as Nd;
671
672    s.char_indices()
673        .take_while(|&(_, c)| Nd.span_table_contains(&c))
674        .map(|(i, c)| i + c.len_utf8())
675        .last()
676}
677
678#[cfg(test)]
679#[test]
680fn test_number() {
681    use ::ScanError as SE;
682    use ::ScanErrorKind as SEK;
683
684    assert_match!(Number::<&str>::scan_from(""), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
685    assert_match!(Number::<&str>::scan_from("a"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
686    assert_match!(Number::<&str>::scan_from("0"), Ok(("0", 1)));
687    assert_match!(Number::<&str>::scan_from("0x"), Ok(("0", 1)));
688    assert_match!(Number::<&str>::scan_from("x0"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
689    assert_match!(Number::<&str>::scan_from("123 456 xyz"), Ok(("123", 3)));
690    assert_match!(Number::<&str>::scan_from("123 456 xyz"), Ok(("123", 3)));
691    assert_match!(Number::<&str>::scan_from("123456789 "), Ok(("123456789", 15)));
692    assert_match!(Number::<&str>::scan_from("𐒩0꘠᧑ "), Ok(("𐒩0꘠᧑", 13)));
693}
694
695/**
696Scans the given `Output` type from its octal representation.
697*/
698pub struct Octal<Output>(PhantomData<Output>);
699
700impl<'a, Output> ScanFromStr<'a> for Octal<Output>
701where Output: ScanFromOctal<'a> {
702    type Output = Output;
703    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
704        Output::scan_from_octal(s)
705    }
706}
707
708#[cfg(test)]
709#[test]
710fn test_octal() {
711    assert_match!(Octal::<i32>::scan_from("0 1 2 x"), Ok((0o0, 1)));
712    assert_match!(Octal::<i32>::scan_from("012x"), Ok((0o12, 3)));
713    assert_match!(Octal::<i32>::scan_from("0o012x"), Ok((0o0, 1)));
714    assert_match!(Octal::<i32>::scan_from("7558"), Ok((0o755, 3)));
715}
716
717/**
718An abstract scanner that scans a `(K, V)` value using the syntax `K: V`.
719
720This scanner is designed to take advantage of three things:
721
7221. Maps (*i.e.* associative containers) typically print themselves with the syntax `{key_0: value_0, key_1: value_1, ...}`.
723
7242. Maps typically implement `Extend<(K, V)>`; that is, you can add new items by extending the map with a `(K, V)` tuple.
725
7263. Repeating bindings can be scanned into any container that implements `Default` and `Extend`.
727
728As such, this scanner allows one to parse a `Map` type like so:
729
730```ignore
731scan!(input; "{", [let kvs: KeyValuePair<K, V>],*: Map<_, _>, "}" => kvs)
732```
733*/
734pub struct KeyValuePair<K, V>(PhantomData<(K, V)>);
735
736impl<'a, K, V> ScanFromStr<'a> for KeyValuePair<K, V>
737where K: ScanSelfFromStr<'a>, V: ScanSelfFromStr<'a> {
738    type Output = (K, V);
739    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
740        let s = s.as_str();
741        scan!(s;
742            (let k: K, ":", let v: V, ..tail) => ((k, v), s.subslice_offset_stable(tail).unwrap())
743        )
744    }
745}
746
747/**
748Scans a quoted string.
749
750Specifically, it scans the quoting format used by the `Debug` formatter for strings.
751
752The scanned string has all escape sequences expanded to their values, and the surrounding quotes removed.
753*/
754pub enum QuotedString {}
755
756impl<'a> ScanFromStr<'a> for QuotedString {
757    type Output = String;
758    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
759        let s = s.as_str();
760        let syn = |s| ScanError::syntax(s);
761
762        let cur = StrCursor::new_at_start(s);
763        let (cp, cur) = try!(cur.next_cp().ok_or(syn("expected quoted string")));
764        match cp {
765            '"' => (),
766            _ => return Err(syn("expected `\"` for quoted string"))
767        }
768
769        let mut s = String::new();
770        let mut cur = cur;
771        loop {
772            match cur.next_cp() {
773                None => return Err(syn("unterminated quoted string")),
774                Some(('\\', after)) => {
775                    match after.slice_after().split_escape_default() {
776                        Err(err) => return Err(ScanError::other(err).add_offset(after.byte_pos())),
777                        Ok((cp, tail)) => {
778                            // TODO: replace this
779                            unsafe { cur.unsafe_set_at(tail); }
780                            s.push(cp);
781                        },
782                    }
783                },
784                Some(('"', after)) => {
785                    cur = after;
786                    break;
787                },
788                Some((cp, after)) => {
789                    cur = after;
790                    s.push(cp);
791                },
792            }
793        }
794
795        Ok((s, cur.byte_pos()))
796    }
797}
798
799#[cfg(test)]
800#[test]
801fn test_quoted_string() {
802    use ::ScanError as SE;
803    use ::ScanErrorKind as SEK;
804    use self::QuotedString as QS;
805
806    assert_match!(QS::scan_from(""), Err(SE { kind: SEK::Syntax(_), .. }));
807    assert_match!(QS::scan_from("dummy xyz"), Err(SE { kind: SEK::Syntax(_), .. }));
808    assert_match!(QS::scan_from("'dummy' xyz"), Err(SE { kind: SEK::Syntax(_), .. }));
809    assert_match!(QS::scan_from("\"dummy\" xyz"),
810        Ok((ref s, 7)) if s == "dummy");
811    assert_match!(QS::scan_from("\"ab\\\"cd\" xyz"),
812        Ok((ref s, 8)) if s == "ab\"cd");
813    assert_match!(QS::scan_from("\"ab\\x41cd\" xyz"),
814        Ok((ref s, 10)) if s == "abAcd");
815    assert_match!(QS::scan_from("\"a\\'b\\u{5B57}c\\0d\" xyz"),
816        Ok((ref s, 18)) if s == "a'b字c\0d");
817}
818
819/**
820Scans a sequence of space characters into a string.
821
822This *will not* match an empty sequence; there must be at least one space character for the scan to succeed.
823*/
824pub struct Space<'a, Output=&'a str>(PhantomData<(&'a (), Output)>);
825
826// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
827#[cfg(str_into_output_extra_broken)]
828impl<'a> ScanFromStr<'a> for Space<'a, &'a str> {
829    type Output = &'a str;
830
831    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
832        let s = s.as_str();
833        match match_space(s) {
834            Some(b) => {
835                let word = &s[..b];
836                let tail = &s[b..];
837                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
838            },
839            // None => Err(ScanError::syntax("expected a space")),
840            None => Err(ScanError::syntax_no_message()),
841        }
842    }
843
844    fn wants_leading_junk_stripped() -> bool { false }
845}
846
847// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
848#[cfg(str_into_output_extra_broken)]
849impl<'a> ScanFromStr<'a> for Space<'a, String> {
850    type Output = String;
851
852    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
853        let s = s.as_str();
854        match match_space(s) {
855            Some(b) => {
856                let word = &s[..b];
857                let tail = &s[b..];
858                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
859            },
860            // None => Err(ScanError::syntax("expected a space")),
861            None => Err(ScanError::syntax_no_message()),
862        }
863    }
864
865    fn wants_leading_junk_stripped() -> bool { false }
866}
867
868// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
869#[cfg(not(str_into_output_extra_broken))]
870impl<'a, Output> ScanFromStr<'a> for Space<'a, Output>
871where &'a str: Into<Output> {
872    type Output = Output;
873
874    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
875        let s = s.as_str();
876        match match_space(s) {
877            Some(b) => {
878                let word = &s[..b];
879                let tail = &s[b..];
880                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
881            },
882            // None => Err(ScanError::syntax("expected a space")),
883            None => Err(ScanError::syntax_no_message()),
884        }
885    }
886
887    fn wants_leading_junk_stripped() -> bool { false }
888}
889
890fn match_space(s: &str) -> Option<usize> {
891    use ::util::TableUtil;
892    use ::unicode::property::White_Space_table as WS;
893
894    s.char_indices()
895        .take_while(|&(_, c)| WS.span_table_contains(&c))
896        .map(|(i, c)| i + c.len_utf8())
897        .last()
898}
899
900#[cfg(test)]
901#[test]
902fn test_space() {
903    use ::ScanError as SE;
904    use ::ScanErrorKind as SEK;
905
906    assert_match!(Space::<&str>::scan_from(""), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
907    assert_match!(Space::<&str>::scan_from("a"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
908    assert_match!(Space::<&str>::scan_from("0"), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
909    assert_match!(Space::<&str>::scan_from(" "), Ok((" ", 1)));
910    assert_match!(Space::<&str>::scan_from("\t"), Ok(("\t", 1)));
911    assert_match!(Space::<&str>::scan_from("\r"), Ok(("\r", 1)));
912    assert_match!(Space::<&str>::scan_from("\n"), Ok(("\n", 1)));
913    assert_match!(Space::<&str>::scan_from("\r\n"), Ok(("\r\n", 2)));
914    assert_match!(Space::<&str>::scan_from("  \t \n \t\t "), Ok(("  \t \n \t\t ", 9)));
915    assert_match!(Space::<&str>::scan_from("  \t \nx \t\t "), Ok(("  \t \n", 5)));
916}
917
918/**
919Scans a single word into a string.
920
921Specifically, this will match a continuous run of alphabetic, digit, punctuation, mark, and joining characters (*i.e.* /`\w+`/).
922*/
923pub struct Word<'a, Output=&'a str>(PhantomData<(&'a (), Output)>);
924
925// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
926#[cfg(str_into_output_extra_broken)]
927impl<'a> ScanFromStr<'a> for Word<'a, &'a str> {
928    type Output = &'a str;
929    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
930        let s = s.as_str();
931        match match_word(s) {
932            Some(b) => {
933                let word = &s[..b];
934                let tail = &s[b..];
935                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
936            },
937            // None => Err(ScanError::syntax("expected a word")),
938            None => Err(ScanError::syntax_no_message()),
939        }
940    }
941}
942
943// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
944#[cfg(str_into_output_extra_broken)]
945impl<'a> ScanFromStr<'a> for Word<'a, String> {
946    type Output = String;
947    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
948        let s = s.as_str();
949        match match_word(s) {
950            Some(b) => {
951                let word = &s[..b];
952                let tail = &s[b..];
953                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
954            },
955            // None => Err(ScanError::syntax("expected a word")),
956            None => Err(ScanError::syntax_no_message()),
957        }
958    }
959}
960
961// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
962#[cfg(not(str_into_output_extra_broken))]
963impl<'a, Output> ScanFromStr<'a> for Word<'a, Output>
964where &'a str: Into<Output> {
965    type Output = Output;
966    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
967        let s = s.as_str();
968        match match_word(s) {
969            Some(b) => {
970                let word = &s[..b];
971                let tail = &s[b..];
972                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
973            },
974            // None => Err(ScanError::syntax("expected a word")),
975            None => Err(ScanError::syntax_no_message()),
976        }
977    }
978}
979
980fn match_word(s: &str) -> Option<usize> {
981    use ::util::TableUtil;
982    use ::unicode::regex::PERLW as W;
983
984    s.char_indices()
985        .take_while(|&(_, c)| W.span_table_contains(&c))
986        .map(|(i, c)| i + c.len_utf8())
987        .last()
988}
989
990#[cfg(test)]
991#[test]
992fn test_word() {
993    use ::ScanError as SE;
994    use ::ScanErrorKind as SEK;
995
996    assert_match!(Word::<&str>::scan_from(""), Err(SE { kind: SEK::SyntaxNoMessage, .. }));
997    assert_match!(Word::<&str>::scan_from("a"), Ok(("a", 1)));
998    assert_match!(Word::<&str>::scan_from("0"), Ok(("0", 1)));
999    assert_match!(Word::<&str>::scan_from("0x"), Ok(("0x", 2)));
1000    assert_match!(Word::<&str>::scan_from("x0"), Ok(("x0", 2)));
1001    assert_match!(Word::<&str>::scan_from("123 456 xyz"), Ok(("123", 3)));
1002    assert_match!(Word::<&str>::scan_from("123 456 xyz"), Ok(("123", 3)));
1003    assert_match!(Word::<&str>::scan_from("123456789 "), Ok(("123456789", 15)));
1004    assert_match!(Word::<&str>::scan_from("𐒩0꘠᧑ "), Ok(("𐒩0꘠᧑", 13)));
1005    assert_match!(Word::<&str>::scan_from("kumquat,bingo"), Ok(("kumquat", 7)));
1006    assert_match!(Word::<&str>::scan_from("mixed言葉كتابة "), Ok(("mixed言葉كتابة", 21)));
1007}
1008
1009/**
1010Scans a single word-ish thing into a string.
1011
1012Specifically, this will match a word (a continuous run of alphabetic, digit, punctuation, mark, and joining characters), a number (a continuous run of digits), or a single other non-whitespace character  (*i.e.* /`\w+|\d+|\S`/).
1013*/
1014pub struct Wordish<'a, Output=&'a str>(PhantomData<(&'a (), Output)>);
1015
1016// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
1017#[cfg(str_into_output_extra_broken)]
1018impl<'a> ScanFromStr<'a> for Wordish<'a, &'a str> {
1019    type Output = &'a str;
1020    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
1021        let s = s.as_str();
1022        // TODO: This should be modified to grab an entire *grapheme cluster* in the event it can't find a word or number.
1023        match match_wordish(s) {
1024            Some(b) => {
1025                let word = &s[..b];
1026                let tail = &s[b..];
1027                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
1028            },
1029            // None => Err(ScanError::syntax("expected a word, number or some other character")),
1030            None => Err(ScanError::syntax_no_message()),
1031        }
1032    }
1033}
1034
1035// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
1036#[cfg(str_into_output_extra_broken)]
1037impl<'a> ScanFromStr<'a> for Wordish<'a, String> {
1038    type Output = String;
1039    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
1040        let s = s.as_str();
1041        // TODO: This should be modified to grab an entire *grapheme cluster* in the event it can't find a word or number.
1042        match match_wordish(s) {
1043            Some(b) => {
1044                let word = &s[..b];
1045                let tail = &s[b..];
1046                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
1047            },
1048            // None => Err(ScanError::syntax("expected a word, number or some other character")),
1049            None => Err(ScanError::syntax_no_message()),
1050        }
1051    }
1052}
1053
1054// FIXME: Error message omitted due to https://github.com/rust-lang/rust/issues/26448.
1055#[cfg(not(str_into_output_extra_broken))]
1056impl<'a, Output> ScanFromStr<'a> for Wordish<'a, Output>
1057where &'a str: Into<Output> {
1058    type Output = Output;
1059    fn scan_from<I: ScanInput<'a>>(s: I) -> Result<(Self::Output, usize), ScanError> {
1060        let s = s.as_str();
1061        // TODO: This should be modified to grab an entire *grapheme cluster* in the event it can't find a word or number.
1062        match match_wordish(s) {
1063            Some(b) => {
1064                let word = &s[..b];
1065                let tail = &s[b..];
1066                Ok((word.into(), s.subslice_offset_stable(tail).unwrap()))
1067            },
1068            // None => Err(ScanError::syntax("expected a word, number or some other character")),
1069            None => Err(ScanError::syntax_no_message()),
1070        }
1071    }
1072}
1073
1074fn match_wordish(s: &str) -> Option<usize> {
1075    use ::util::TableUtil;
1076    use ::unicode::regex::PERLW;
1077
1078    let word_len = s.char_indices()
1079        .take_while(|&(_, c)| PERLW.span_table_contains(&c))
1080        .map(|(i, c)| i + c.len_utf8())
1081        .last();
1082
1083    match word_len {
1084        Some(n) => Some(n),
1085        None => s.chars().next().map(|c| c.len_utf8()),
1086    }
1087}