domain_core/master/
scan.rs

1//! Scanning master file tokens.
2
3use std::{fmt, io};
4use std::net::AddrParseError;
5use bytes::{BufMut, Bytes, BytesMut};
6use failure::Fail;
7use ::bits::name;
8use ::bits::name::Dname;
9use ::utils::{base32, base64};
10
11
12//------------ CharSource ----------------------------------------------------
13
14/// A source of master file characters.
15///
16/// This is very similar to an iterator except that `next`’s return value has
17/// the result outside for easier error handling.
18pub trait CharSource {
19    /// Provides the next character in the source.
20    ///
21    /// If the source runs out of characters, returns `Ok(None)`.
22    fn next(&mut self) -> Result<Option<char>, io::Error>;
23}
24
25
26//------------ Scanner -------------------------------------------------------
27
28/// Reader of master file tokens.
29///
30/// A scanner reads characters from a source and converts them into tokens or
31/// errors.
32#[derive(Clone, Debug)]
33pub struct Scanner<C: CharSource> {
34    /// The underlying character source.
35    chars: C,
36
37    /// The buffer for rejected tokens.
38    ///
39    /// It will be kept short by flushing it every time we successfully read
40    /// to its end.
41    buf: Vec<Token>,
42
43    /// Index in `buf` of the start of the token currently being read.
44    start: usize,
45
46    /// Index in `buf` of the next character to be read.
47    cur: usize,
48
49    /// Human-friendly position in `chars` of `start`.
50    start_pos: Pos,
51
52    /// Human-friendly position in `chars` of `cur`.
53    cur_pos: Pos,
54
55    /// Was the start of token in a parenthesized group?
56    paren: bool,
57
58    /// Our newline mode
59    newline: NewlineMode,
60
61    /// The current origin for domain names, if any.
62    origin: Option<Dname>,
63}
64
65
66/// # Creation
67///
68impl<C: CharSource> Scanner<C> {
69    /// Creates a new scanner.
70    pub fn new(chars: C) -> Self {
71        Scanner::with_pos(chars, Pos::new())
72    }
73
74    /// Creates a new scanner using the given character source and position.
75    ///
76    /// The scanner will assume that the current position of `chars`
77    /// corresponds to the human-friendly position `pos`.
78    pub fn with_pos(chars: C, pos: Pos) -> Self {
79        Scanner {
80            chars,
81            buf: Vec::new(),
82            start: 0,
83            cur: 0,
84            start_pos: pos,
85            cur_pos: pos,
86            paren: false,
87            newline: NewlineMode::Unknown,
88            origin: None,
89        }
90    }
91}
92
93/// # Access to Origin
94///
95/// Domain names in a master file that do not end in a dot are relative to
96/// some origin. This origin is simply appened to them to form an absolute
97/// name.
98///
99/// Since domain names can appear all over the place and we don’t want to
100/// have to pass around the origin all the time, it is part of the scanner
101/// and can be set and retrieved any time.
102impl<C: CharSource> Scanner<C> {
103    /// Returns the current origin if any.
104    pub fn origin(&self) -> &Option<Dname> {
105        &self.origin
106    }
107
108    /// Sets the origin to the given value.
109    pub fn set_origin(&mut self, origin: Option<Dname>) {
110        self.origin = origin
111    }
112}
113
114
115/// # Fundamental Scanning
116///
117impl<C: CharSource> Scanner<C> {
118    /// Returns whether the scanner has reached the end of data.
119    #[allow(wrong_self_convention)] // XXX Continue changing.
120    pub fn is_eof(&mut self) -> bool {
121        match self.peek() {
122            Ok(Some(_)) => false,
123            _ => true
124        }
125    }
126
127    /// Returns the current position of the scanner.
128    pub fn pos(&self) -> Pos {
129        self.cur_pos
130    }
131
132    /// Scans a word token.
133    ///
134    /// A word is a sequence of non-special characters and escape sequences
135    /// followed by a non-empty sequence of space unless it is followed
136    /// directly by a [newline](#method.scan_newline). If successful, the
137    /// method will position at the end of the space sequence if it is
138    /// required. That is, you can scan for two subsequent word tokens
139    /// without worrying about the space between them.
140    ///
141    /// The method starts out with a `target` value and two closures. The
142    /// first closure, `symbolop`, is being fed symbols of the word one by one
143    /// and should feed them into the target. Once the word ended, the
144    /// second closure is called to convert the target into the final result.
145    /// Both can error out at any time stopping processing and leading the
146    /// scanner to revert to the beginning of the token.
147    pub fn scan_word<T, U, F, G>(&mut self, mut target: T, mut symbolop: F,
148                                 finalop: G) -> Result<U, ScanError>
149                     where F: FnMut(&mut T, Symbol)
150                                    -> Result<(), SyntaxError>,
151                           G: FnOnce(T) -> Result<U, SyntaxError> {
152        match self.peek()? {
153            Some(Token::Symbol(ch)) => {
154                if !ch.is_word_char() {
155                    return self.err(SyntaxError::Unexpected(ch))
156                }
157            }
158            Some(Token::Newline) => {
159                return self.err(SyntaxError::UnexpectedNewline)
160            }
161            None => return self.err(SyntaxError::UnexpectedEof)
162        };
163        while let Some(ch) = self.cond_read_symbol(Symbol::is_word_char)? {
164            if let Err(err) = symbolop(&mut target, ch) {
165                return self.err_cur(err)
166            }
167        }
168        let res = match finalop(target) {
169            Ok(res) => res,
170            Err(err) => return self.err(err)
171        };
172        self.skip_delimiter()?;
173        Ok(res)
174    }
175
176    /// Scans a word with Unicode text into a `String`.
177    ///
178    /// The method scans a word that consists of characters and puts these
179    /// into a `String`. Once the word ends, the caller is given a chance
180    /// to convert the value into something else via the closure `finalop`.
181    /// This closure can fail, resulting in an error and back-tracking to
182    /// the beginning of the phrase.
183    pub fn scan_string_word<U, G>(&mut self, finalop: G)
184           -> Result<U, ScanError>
185    where G: FnOnce(String) -> Result<U, SyntaxError> {
186        self.scan_word(
187            String::new(),
188            |res, ch| {
189                let ch = match ch {
190                    Symbol::Char(ch) | Symbol::SimpleEscape(ch) => ch,
191                    Symbol::DecimalEscape(ch) => ch as char,
192                };
193                res.push(ch);
194                Ok(())
195            },
196            finalop
197        )
198    }
199
200    /// Scans a quoted word.
201    ///
202    /// A quoted word starts with a double quote `"`, followed by all sorts
203    /// of characters or escape sequences until the next (unescaped) double
204    /// quote. It may contain line feeds. Like a regular word, a quoted word
205    /// is followed by a non-empty space sequence unless it is directly
206    /// followed by a [newline](#method.scan_newline). This space is not
207    /// part of the content but quietly skipped over.
208    ///
209    /// The method starts out with a `target` value and two closures. The
210    /// first closure, `symbolop`, is being fed symbols of the word one by one
211    /// and should feed them into the target. Once the word ended, the
212    /// second closure is called to convert the target into the final result.
213    /// Both can error out at any time stopping processing and leading the
214    /// scanner to revert to the beginning of the token.
215    pub fn scan_quoted<T, U, F, G>(&mut self, mut target: T, mut symbolop: F,
216                                   finalop: G) -> Result<U, ScanError>
217                       where F: FnMut(&mut T, Symbol)
218                                    -> Result<(), SyntaxError>,
219                             G: FnOnce(T) -> Result<U, SyntaxError> {
220        match self.read()? {
221            Some(Token::Symbol(Symbol::Char('"'))) => { }
222            Some(Token::Symbol(ch)) => {
223                return self.err(SyntaxError::Unexpected(ch))
224            }
225            Some(Token::Newline) => {
226                return self.err(SyntaxError::UnexpectedNewline)
227            }
228            None => return self.err(SyntaxError::UnexpectedEof)
229        }
230        loop {
231            match self.read()? {
232                Some(Token::Symbol(Symbol::Char('"'))) => break,
233                Some(Token::Symbol(ch)) => {
234                    if let Err(err) = symbolop(&mut target, ch) {
235                        return self.err(err)
236                    }
237                }
238                Some(Token::Newline) => {
239                    return self.err(SyntaxError::UnexpectedNewline)
240                }
241                None => return self.err(SyntaxError::UnexpectedEof),
242            }
243        }
244        let res = match finalop(target) {
245            Ok(res) => res,
246            Err(err) => return self.err(err)
247        };
248        self.skip_delimiter()?;
249        Ok(res)
250    }
251
252    /// Scans a phrase: a normal word or a quoted word.
253    ///
254    /// This method behaves like [scan_quoted()](#method.scan_quoted) if
255    /// the next character is a double quote or like
256    /// [scan_word()](#method.scan_word) otherwise.
257    pub fn scan_phrase<T, U, F, G>(&mut self, target: T, symbolop: F,
258                                   finalop: G) -> Result<U, ScanError>
259                       where F: FnMut(&mut T, Symbol)
260                                    -> Result<(), SyntaxError>,
261                             G: FnOnce(T) -> Result<U, SyntaxError> {
262        if let Some(Token::Symbol(Symbol::Char('"'))) = self.peek()? {
263            self.scan_quoted(target, symbolop, finalop)
264        }
265        else {
266            self.scan_word(target, symbolop, finalop)
267        }
268    }
269
270    /// Scans a phrase with byte content into a `Bytes` value.
271    ///
272    /// The method scans a phrase that consists of byte only and puts these
273    /// bytes into a `Bytes` value. Once the phrase ends, the caller is given
274    /// a chance to convert the value into something else via the closure
275    /// `finalop`. This closure can fail, resulting in an error and
276    /// back-tracking to the beginning of the phrase.
277    pub fn scan_byte_phrase<U, G>(&mut self, finalop: G)
278                                  -> Result<U, ScanError>
279                            where G: FnOnce(Bytes) -> Result<U, SyntaxError> {
280        self.scan_phrase(
281            BytesMut::new(),
282            |buf, symbol| symbol.push_to_buf(buf).map_err(Into::into),
283            |buf| finalop(buf.freeze())
284        )
285    }
286
287    /// Scans a phrase with Unicode text into a `String`.
288    ///
289    /// The method scans a phrase that consists of characters and puts these
290    /// into a `String`. Once the phrase ends, the caller is given
291    /// a chance to convert the value into something else via the closure
292    /// `finalop`. This closure can fail, resulting in an error and
293    /// back-tracking to the beginning of the phrase.
294    pub fn scan_string_phrase<U, G>(&mut self, finalop: G)
295                                    -> Result<U, ScanError>
296                              where G: FnOnce(String)
297                                              -> Result<U, SyntaxError> {
298        self.scan_phrase(
299            String::new(),
300            |res, ch| {
301                let ch = match ch {
302                    Symbol::Char(ch) | Symbol::SimpleEscape(ch) => ch,
303                    Symbol::DecimalEscape(ch) => ch as char,
304                };
305                res.push(ch);
306                Ok(())
307            },
308            finalop
309        )
310    }
311
312    /// Scans over a mandatory newline.
313    ///
314    /// A newline is either an optional comment followed by a newline sequence
315    /// or the end of file. The latter is so that a file lacking a line feed
316    /// after its last line is still parsed successfully.
317    pub fn scan_newline(&mut self) -> Result<(), ScanError> {
318        match self.read()? {
319            Some(Token::Symbol(Symbol::Char(';'))) => {
320                while let Some(ch) = self.read()? {
321                    if ch.is_newline() {
322                        break
323                    }
324                }
325                self.ok(())
326            }
327            Some(Token::Newline) => self.ok(()),
328            None => self.ok(()),
329            _ => self.err(SyntaxError::ExpectedNewline)
330        }
331    }
332
333    /// Scans over a mandatory sequence of space.
334    ///
335    /// There are two flavors of space. The simple form is any sequence
336    /// of a space character `' '` or a horizontal tab '`\t'`. However,
337    /// a parenthesis can be used to turn [newlines](#method.scan_newline)
338    /// into normal space. This method recognises parentheses and acts
339    /// accordingly.
340    pub fn scan_space(&mut self) -> Result<(), ScanError> {
341        if self.skip_space()? {
342            self.ok(())
343        }
344        else {
345            self.err(SyntaxError::ExpectedSpace)
346        }
347    }
348
349    /// Scans over an optional sequence of space.
350    pub fn scan_opt_space(&mut self) -> Result<(), ScanError> {
351        self.skip_space()?;
352        Ok(())
353    }
354
355    /// Skips over an entry.
356    ///
357    /// Keeps reading until it successfully scans a newline. The method
358    /// tries to be smart about that and considers parentheses, quotes, and
359    /// escapes but also tries its best to not fail.
360    pub fn skip_entry(&mut self) -> Result<(), ScanError> {
361        let mut quote = false;
362        loop {
363            match self.read()? {
364                None => break,
365                Some(Token::Newline) => {
366                    if !quote && !self.paren {
367                        break
368                    }
369                }
370                Some(Token::Symbol(Symbol::Char('"'))) => quote = !quote,
371                Some(Token::Symbol(Symbol::Char('('))) => {
372                    if !quote {
373                        if self.paren {
374                            return self.err(SyntaxError::NestedParentheses)
375                        }
376                        self.paren = true
377                    }
378                }
379                Some(Token::Symbol(Symbol::Char(')'))) => {
380                    if !quote {
381                        if !self.paren {
382                            return self.err(SyntaxError::Unexpected(')'.into()))
383                        }
384                        self.paren = false
385                    }
386                }
387                _ => { }
388            }
389        }
390        self.ok(())
391    }
392
393    /// Skips over the word with the content `literal`.
394    ///
395    /// The content indeed needs to be literally the literal. Escapes are
396    /// not translated before comparison and case has to be as is.
397    pub fn skip_literal(&mut self, literal: &str) -> Result<(), ScanError> {
398        self.scan_word(
399            literal,
400            |left, symbol| {
401                let first = match left.chars().next() {
402                    Some(ch) => ch,
403                    None => return Err(SyntaxError::Expected(literal.into()))
404                };
405                match symbol {
406                    Symbol::Char(ch) if ch == first => {
407                        *left = &left[ch.len_utf8()..];
408                        Ok(())
409                    }
410                    _ => Err(SyntaxError::Expected(literal.into()))
411                }
412            },
413            |left| {
414                if left.is_empty() {
415                    Ok(())
416                }
417                else {
418                    Err(SyntaxError::Expected(literal.into()))
419                }
420            }
421        )
422    }
423}
424
425/// # Complex Scanning
426///
427impl<C: CharSource> Scanner<C> {
428    /// Scans a word containing a sequence of pairs of hex digits.
429    ///
430    /// The word is returned as a `Bytes` value with each byte representing
431    /// the decoded value of one hex digit pair.
432    pub fn scan_hex_word<U, G>(&mut self, finalop: G) -> Result<U, ScanError>
433                         where G: FnOnce(Bytes) -> Result<U, SyntaxError> {
434        self.scan_word(
435            (BytesMut::new(), None), // result and optional first char.
436            |&mut (ref mut res, ref mut first), symbol | {
437                hex_symbolop(res, first, symbol)
438            },
439            |(res, first)| {
440                if let Some(ch) = first {
441                    Err(SyntaxError::Unexpected(
442                            Symbol::Char(::std::char::from_digit(ch, 16)
443                                                                 .unwrap())))
444                }
445                else {
446                    finalop(res.freeze())
447                }
448            }
449        )
450    }
451
452    pub fn scan_hex_words<U, G>(&mut self, finalop: G) -> Result<U, ScanError>
453    where G: FnOnce(Bytes) -> Result<U, SyntaxError> {
454        let start_pos = self.pos();
455        let mut buf = BytesMut::new();
456        let mut first = true;
457        loop {
458            let res = self.scan_word(
459                (&mut buf, None),
460                |&mut (ref mut buf, ref mut first), symbol| {
461                    hex_symbolop(buf, first, symbol)
462                },
463                |(_, first)| {
464                    if let Some(ch) = first {
465                        Err(SyntaxError::Unexpected(
466                            Symbol::Char(
467                                ::std::char::from_digit(ch, 16).unwrap()
468                            )
469                        ))
470                    }
471                    else {
472                        Ok(())
473                    }
474                }
475            );
476            if first {
477                if let Err(err) = res {
478                    return Err(err)
479                }
480                first = false;
481            }
482            else if res.is_err() {
483                break
484            }
485        }
486        finalop(buf.freeze()).map_err(|err| (err, start_pos).into())
487    }
488
489    /// Scans a phrase containing base32hex encoded data.
490    ///
491    /// In particular, this decodes the “base32hex” decoding definied in
492    /// RFC 4648 without padding.
493    pub fn scan_base32hex_phrase<U, G>(
494        &mut self,
495        finalop: G
496    ) -> Result<U, ScanError>
497    where G: FnOnce(Bytes) -> Result<U, SyntaxError> {
498        self.scan_phrase(
499            base32::Decoder::new_hex(),
500            |decoder, symbol| {
501                decoder.push(symbol.into_char()?)
502                       .map_err(SyntaxError::content)
503            },
504            |decoder| {
505                finalop(decoder.finalize().map_err(SyntaxError::content)?)
506            }
507        )
508    }
509
510    /// Scans a sequence of phrases containing base64 encoded data.
511    pub fn scan_base64_phrases<U, G>(
512        &mut self,
513        finalop: G
514    ) -> Result<U, ScanError>
515    where G: FnOnce(Bytes) -> Result<U, SyntaxError> {
516        let start_pos = self.pos();
517        let mut decoder = base64::Decoder::new();
518        let mut first = true;
519        loop {
520            let res = self.scan_phrase(
521                &mut decoder, 
522                |decoder, symbol| {
523                    decoder.push(symbol.into_char()?)
524                           .map_err(SyntaxError::content)
525                },
526                Ok
527            );
528            if first {
529                if let Err(err) = res {
530                    return Err(err)
531                }
532                first = false;
533            }
534            else if res.is_err() {
535                break
536            }
537        }
538        let bytes = decoder.finalize().map_err(|err| {
539            (SyntaxError::content(err), self.pos())
540        })?;
541        finalop(bytes).map_err(|err| (err, start_pos).into())
542    }
543}
544
545fn hex_symbolop(
546    buf: &mut BytesMut,
547    first: &mut Option<u32>,
548    symbol: Symbol
549) -> Result<(), SyntaxError> {
550    let ch = match symbol {
551        Symbol::Char(ch) => {
552            match ch.to_digit(16) {
553                Some(ch) => ch,
554                _ => return Err(SyntaxError::Unexpected(symbol))
555            }
556        }
557        _ => return Err(SyntaxError::Unexpected(symbol))
558    };
559    if let Some(ch1) = first.take() {
560        if buf.remaining_mut() == 0 {
561            buf.reserve(1)
562        }
563        buf.put_u8((ch1 as u8) << 4 | (ch as u8));
564    }
565    else {
566        *first = Some(ch)
567    }
568    Ok(())
569}
570
571
572/// # Fundamental Reading, Processing, and Back-tracking
573///
574impl<C: CharSource> Scanner<C> {
575    /// Reads a char from the source.
576    ///
577    /// This function is here to for error conversion only and updating the
578    /// human-friendly position.
579    fn chars_next(&mut self) -> Result<Option<char>, ScanError> {
580        self.chars.next().map_err(|err| {
581            let mut pos = self.cur_pos;
582            for ch in &self.buf {
583                pos.update(*ch)
584            }
585            ScanError::Source(err, pos)
586        })
587    }
588
589    /// Tries to read at least one additional character into the buffer.
590    ///
591    /// Returns whether that succeeded.
592    fn source_token(&mut self) -> Result<bool, ScanError> {
593        let ch = match self.chars_next()? {
594            Some(ch) => ch,
595            None => return Ok(false),
596        };
597        if ch == '\\' {
598            self.source_escape()
599        }
600        else {
601            self.source_normal(ch)
602        }
603    }
604
605    /// Tries to read and return the content of an escape sequence.
606    fn source_escape(&mut self) -> Result<bool, ScanError> {
607        let ch = match self.chars_next()? {
608            Some(ch) if ch.is_digit(10) => {
609                let ch = ch.to_digit(10).unwrap() * 100;
610                let ch2 = match self.chars_next()? {
611                    Some(ch) => match ch.to_digit(10) {
612                        Some(ch) => ch * 10,
613                        None => {
614                            return self.err_cur(SyntaxError::IllegalEscape)
615                        }
616                    }
617                    None => {
618                        return self.err_cur(SyntaxError::UnexpectedEof)
619                    }
620                };
621                let ch3 = match self.chars_next()? {
622                    Some(ch)  => match ch.to_digit(10) {
623                        Some(ch) => ch,
624                        None => {
625                            return self.err_cur(SyntaxError::IllegalEscape)
626                        }
627                    }
628                    None => {
629                        return self.err_cur(SyntaxError::UnexpectedEof)
630                    }
631                };
632                let res = ch + ch2 + ch3;
633                if res > 255 {
634                    return self.err_cur(SyntaxError::IllegalEscape)
635                }
636                else {
637                    Symbol::DecimalEscape(res as u8)
638                }
639            }
640            Some(ch) => Symbol::SimpleEscape(ch),
641            None => {
642                return self.err_cur(SyntaxError::UnexpectedEof)
643            }
644        };
645        self.buf.push(Token::Symbol(ch));
646        Ok(true)
647    }
648
649    /// Tries to source a normal character.
650    fn source_normal(&mut self, ch: char) -> Result<bool, ScanError> {
651        match self.newline {
652            NewlineMode::Single(sep) => {
653                if ch == sep {
654                    self.buf.push(Token::Newline)
655                }
656                else {
657                    self.buf.push(Token::Symbol(Symbol::Char(ch)))
658                }
659                Ok(true)
660            }
661            NewlineMode::Double(first, second) => {
662                if ch != first {
663                    self.buf.push(Token::Symbol(Symbol::Char(ch)));
664                    Ok(true)
665                }
666                else {
667                    match self.chars_next()? {
668                        Some(ch) if ch == second => {
669                            self.buf.push(Token::Newline);
670                            Ok(true)
671                        }
672                        Some(ch) => {
673                            self.buf.push(Token::Symbol(Symbol::Char(first)));
674                            self.buf.push(Token::Symbol(Symbol::Char(ch)));
675                            Ok(true)
676                        }
677                        None => {
678                            // Half a newline is still EOF.
679                            Ok(false)
680                        }
681                    }
682                }
683            }
684            NewlineMode::Unknown => {
685                if ch != '\r' && ch != '\n' {
686                    self.buf.push(Token::Symbol(Symbol::Char(ch)));
687                    Ok(true)
688                }
689                else if let Some(second) = self.chars_next()? {
690                    match (ch, second) {
691                        ('\r', '\n') | ('\n', '\r') => {
692                            self.newline = NewlineMode::Double(ch, second);
693                            self.buf.push(Token::Newline);
694                        }
695                        ('\r', '\r') | ('\n', '\n')  => {
696                            self.newline = NewlineMode::Single(ch);
697                            self.buf.push(Token::Newline);
698                            self.buf.push(Token::Newline);
699                        }
700                        ('\r', _) | ('\n', _) => {
701                            self.newline = NewlineMode::Single(ch);
702                            self.buf.push(Token::Newline);
703                            self.buf.push(Token::Symbol(Symbol::Char(second)));
704                        }
705                        _ => {
706                            self.buf.push(Token::Symbol(Symbol::Char(ch)));
707                            self.buf.push(Token::Symbol(Symbol::Char(second)));
708                        }
709                    }
710                    Ok(true)
711                }
712                else {
713                    if ch == '\r' || ch == '\n' {
714                        self.buf.push(Token::Newline);
715                    }
716                    else {
717                        self.buf.push(Token::Symbol(Symbol::Char(ch)))
718                    }
719                    Ok(true)
720                }
721            }
722        }
723    }
724    
725    /// Tries to peek at the next symbol.
726    ///
727    /// On success, returns the symbol. It the end of the
728    /// underlying source is reached, returns `Ok(None)`. If reading on the
729    /// underlying source results in an error, returns that.
730    fn peek(&mut self) -> Result<Option<Token>, ScanError> {
731        if self.buf.len() == self.cur && !self.source_token()? {
732            return Ok(None)
733        }
734        Ok(Some(self.buf[self.cur]))
735    }
736
737    /// Tries to read a symbol.
738    ///
739    /// On success, returns the `Ok(Some(_))` character. It the end of the
740    /// underlying source is reached, returns `Ok(None)`. If reading on the
741    /// underlying source results in an error, returns that.
742    fn read(&mut self) -> Result<Option<Token>, ScanError> {
743        self.peek().map(|res| match res {
744            Some(ch) => {
745                self.cur += 1;
746                self.cur_pos.update(ch);
747                Some(ch)
748            }
749            None => None
750        })
751    }
752
753    /// Skip the first token.
754    ///
755    /// Only ever call this if you called `peek` before and it did return
756    /// `Some(ch)`.
757    ///
758    /// This is an optimization.
759    fn skip(&mut self, ch: Token) {
760        self.cur += 1;
761        self.cur_pos.update(ch)
762    }
763
764
765    /// Progresses the scanner to the current position and returns `t`.
766    fn ok<T>(&mut self, t: T) -> Result<T, ScanError> {
767        if self.buf.len() == self.cur {
768            self.buf.clear();
769            self.start = 0;
770            self.cur = 0;
771        } else {
772            self.start = self.cur;
773        }
774        self.start_pos = self.cur_pos;
775        Ok(t)
776    }
777
778    /// Backtracks to the last token start and reports an error there.
779    ///
780    /// Returns a syntax error with the given error value and the position
781    /// of the token start.
782    ///
783    /// The method is generic over whatever type `T` so it can be used to
784    /// create whatever particular result is needed.
785    fn err<T>(&mut self, err: SyntaxError) -> Result<T, ScanError> {
786        let pos = self.start_pos;
787        self.err_at(err, pos)
788    }
789
790    fn err_cur<T>(&mut self, err: SyntaxError) -> Result<T, ScanError> {
791        let pos = self.cur_pos;
792        self.err_at(err, pos)
793    }
794
795    /// Reports an error at current position and then backtracks.
796    fn err_at<T>(&mut self, err: SyntaxError, pos: Pos)
797                 -> Result<T, ScanError> {
798        self.cur = self.start;
799        self.cur_pos = self.start_pos;
800        Err(ScanError::Syntax(err, pos))
801    }
802}
803
804/// # More Complex Internal Reading
805///
806impl<C: CharSource> Scanner<C> {
807    /// Reads a symbol if it is accepted by a closure.
808    ///
809    /// The symbol is passed to the closure which should return `true` if
810    /// it accepts it in which case the method returns `Ok(Some(_))`. If
811    /// the closure returns `false` or the end of file is reached, `Ok(None)`
812    /// is returned.
813    ///
814    /// The method does not progress or backtrack.
815    fn cond_read<F>(&mut self, f: F)
816                         -> Result<Option<Token>, ScanError>
817                      where F: FnOnce(Token) -> bool {
818        match self.peek()? {
819            Some(ch) if f(ch) => self.read(),
820            _ => Ok(None)
821        }
822    }
823
824    fn cond_read_symbol<F>(&mut self, f: F)
825                           -> Result<Option<Symbol>, ScanError>
826                        where F: FnOnce(Symbol) -> bool {
827        match self.peek()? {
828            Some(Token::Symbol(ch)) if f(ch) => {
829                self.skip(Token::Symbol(ch));
830                Ok(Some(ch))
831            }
832            _ => Ok(None)
833        }
834    }
835
836    /// Skips over delimiting space.
837    ///
838    /// A delimiter is a non-empty sequence of space (which means that
839    /// something like `"foo(bar"` qualifies as the two words `"foo"` and
840    /// `"bar".) or if the following byte is the beginning of a newline or
841    /// if the scanner has reached end-of-file.
842    ///
843    /// Progresses the scanner on success, otherwise backtracks with an
844    /// ‘unexpected space’ error.
845    fn skip_delimiter(&mut self) -> Result<(), ScanError> {
846        if self.skip_space()? {
847            self.ok(())
848        }
849        else {
850            match self.peek()? {
851                Some(ch) if ch.is_newline_ahead() => self.ok(()),
852                None => self.ok(()),
853                _ => self.err(SyntaxError::ExpectedSpace)
854            }
855        }
856    }
857
858    /// Skips over space.
859    ///
860    /// Normally, space is ordinary white space (`' '` and `'\t'`).
861    /// However, an opening parenthesis can be used to make newlines appear
862    /// as space, too. A closing parenthesis resets this behaviour.
863    ///
864    /// This method cleverly hides all of this and simply walks over whatever
865    /// is space. It returns whether there was at least one character of
866    /// space.  It does not progress the scanner but backtracks on error.
867    fn skip_space(&mut self) -> Result<bool, ScanError> {
868        let mut res = false;
869        loop {
870            if self.paren {
871                match self.cond_read(Token::is_paren_space)? {
872                    None => break,
873                    Some(Token::Symbol(Symbol::Char('('))) => {
874                        let pos = self.cur_pos.prev();
875                        return self.err_at(SyntaxError::NestedParentheses,
876                                           pos)
877                    }
878                    Some(Token::Symbol(Symbol::Char(')'))) => {
879                        self.paren = false;
880                    }
881                    Some(Token::Symbol(Symbol::Char(';'))) => {
882                        while let Some(ch) = self.read()? {
883                            if ch.is_newline() {
884                                break
885                            }
886                        }
887                    }
888                    _ => { }
889                }
890            }
891            else {
892                match self.cond_read(Token::is_non_paren_space)? {
893                    None => break,
894                    Some(Token::Symbol(Symbol::Char('('))) => {
895                        self.paren = true;
896                    }
897                    Some(Token::Symbol(Symbol::Char(')'))) => {
898                        let pos = self.cur_pos.prev();
899                        return self.err_at(SyntaxError::Unexpected(
900                                                             ')'.into()), pos)
901                    }
902                    _ => { }
903                }
904            }
905            res = true;
906        }
907        Ok(res)
908    }
909}
910
911
912//------------ Scan ----------------------------------------------------------
913
914/// A type that can by scanned from a master file.
915pub trait Scan: Sized {
916    /// Scans a value from a master file.
917    fn scan<C: CharSource>(scanner: &mut Scanner<C>)
918                           -> Result<Self, ScanError>;
919}
920
921impl Scan for u32 {
922    fn scan<C: CharSource>(scanner: &mut Scanner<C>)
923                           -> Result<Self, ScanError> {
924        scanner.scan_phrase(
925            0u32,
926            |res, symbol| {
927                let ch = match symbol {
928                    Symbol::Char(ch) => {
929                        if let Some(value) = ch.to_digit(10) {
930                            value
931                        }
932                        else {
933                            return Err(SyntaxError::Unexpected(symbol))
934                        }
935                    }
936                    _ => return Err(SyntaxError::Unexpected(symbol))
937                };
938                *res = match res.checked_mul(10) {
939                    Some(res) => res,
940                    None => return Err(SyntaxError::IllegalInteger)
941                };
942                *res = match res.checked_add(ch) {
943                    Some(res) => res,
944                    None => return Err(SyntaxError::IllegalInteger)
945                };
946                Ok(())
947            },
948            Ok
949        )
950    }
951}
952
953impl Scan for u16 {
954    fn scan<C: CharSource>(scanner: &mut Scanner<C>)
955                           -> Result<Self, ScanError> {
956        scanner.scan_phrase(
957            0u16,
958            |res, symbol| {
959                let ch = match symbol {
960                    Symbol::Char(ch) => {
961                        if let Some(value) = ch.to_digit(10) {
962                            value as u16
963                        }
964                        else {
965                            return Err(SyntaxError::Unexpected(symbol))
966                        }
967                    }
968                    _ => return Err(SyntaxError::Unexpected(symbol))
969                };
970                *res = match res.checked_mul(10) {
971                    Some(res) => res,
972                    None => return Err(SyntaxError::IllegalInteger)
973                };
974                *res = match res.checked_add(ch) {
975                    Some(res) => res,
976                    None => return Err(SyntaxError::IllegalInteger)
977                };
978                Ok(())
979            },
980            Ok
981        )
982    }
983}
984
985
986impl Scan for u8 {
987    fn scan<C: CharSource>(scanner: &mut Scanner<C>)
988                           -> Result<Self, ScanError> {
989        scanner.scan_phrase(
990            0u8,
991            |res, symbol| {
992                let ch = match symbol {
993                    Symbol::Char(ch) => {
994                        if let Some(value) = ch.to_digit(10) {
995                            value as u8
996                        }
997                        else {
998                            return Err(SyntaxError::Unexpected(symbol))
999                        }
1000                    }
1001                    _ => return Err(SyntaxError::Unexpected(symbol))
1002                };
1003                *res = match res.checked_mul(10) {
1004                    Some(res) => res,
1005                    None => return Err(SyntaxError::IllegalInteger)
1006                };
1007                *res = match res.checked_add(ch) {
1008                    Some(res) => res,
1009                    None => return Err(SyntaxError::IllegalInteger)
1010                };
1011                Ok(())
1012            },
1013            Ok
1014        )
1015    }
1016}
1017
1018
1019//------------ Symbol --------------------------------------------------------
1020
1021/// The master file representation of a single character.
1022#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1023pub enum Symbol {
1024    /// An unescaped Unicode character.
1025    Char(char),
1026
1027    /// An escape character by simply being backslashed.
1028    SimpleEscape(char),
1029
1030    /// An escaped character using the decimal escape sequence.
1031    DecimalEscape(u8),
1032}
1033
1034impl Symbol {
1035    /// Reads a symbol from a character source.
1036    ///
1037    /// Returns the next symbol in the source, `Ok(None)` if the source has
1038    /// been exhausted, or an error if there wasn’t a valid symbol.
1039    pub fn from_chars<C>(chars: C) -> Result<Option<Self>, SymbolError>
1040                      where C: IntoIterator<Item=char> {
1041        let mut chars = chars.into_iter();
1042        let ch = match chars.next() {
1043            Some(ch) => ch,
1044            None => return Ok(None),
1045        };
1046        if ch != '\\' {
1047            return Ok(Some(Symbol::Char(ch)))
1048        }
1049        match chars.next() {
1050            Some(ch) if ch.is_digit(10) => {
1051                let ch = ch.to_digit(10).unwrap() * 100;
1052                let ch2 = match chars.next() {
1053                    Some(ch) => match ch.to_digit(10) {
1054                        Some(ch) => ch * 10,
1055                        None => return Err(SymbolError::BadEscape)
1056                    }
1057                    None => return Err(SymbolError::ShortInput)
1058                };
1059                let ch3 = match chars.next() {
1060                    Some(ch)  => match ch.to_digit(10) {
1061                        Some(ch) => ch,
1062                        None => return Err(SymbolError::BadEscape)
1063                    }
1064                    None => return Err(SymbolError::ShortInput)
1065                };
1066                let res = ch + ch2 + ch3;
1067                if res > 255 {
1068                    return Err(SymbolError::BadEscape)
1069                }
1070                Ok(Some(Symbol::DecimalEscape(res as u8)))
1071            }
1072            Some(ch) => Ok(Some(Symbol::SimpleEscape(ch))),
1073            None => Err(SymbolError::ShortInput)
1074        }
1075    }
1076
1077    /// Provides the best symbol for a byte.
1078    ///
1079    /// The function will use simple escape sequences for spaces, quotes,
1080    /// backslashs, and semicolons. It will leave all other printable ASCII
1081    /// characters unescaped and decimal escape all remaining byte value.
1082    pub fn from_byte(ch: u8) -> Self {
1083        if ch == b' ' || ch == b'"' || ch == b'\\' || ch == b';' {
1084            Symbol::SimpleEscape(ch as char)
1085        }
1086        else if ch < 0x20 || ch > 0x7E {
1087            Symbol::DecimalEscape(ch)
1088        }
1089        else {
1090            Symbol::Char(ch as char)
1091        }
1092    }
1093
1094    /// Converts the symbol into a byte if it represents one.
1095    ///
1096    /// Both domain names and character strings operate on bytes instead of
1097    /// (Unicode) characters. These bytes can be represented by printable
1098    /// ASCII characters (that is, U+0020 to U+007E), both plain or through
1099    /// a simple escape, or by a decimal escape.
1100    ///
1101    /// This method returns such a byte or an error otherwise. Note that it
1102    /// will succeed for an ASCII space character U+0020 which may be used
1103    /// as a word separator in some cases.
1104    pub fn into_byte(self) -> Result<u8, BadSymbol> {
1105        match self {
1106            Symbol::Char(ch) | Symbol::SimpleEscape(ch) => {
1107                if ch.is_ascii() && ch >= '\u{20}' && ch <= '\u{7E}' {
1108                    Ok(ch as u8)
1109                }
1110                else {
1111                    Err(BadSymbol(self))
1112                }
1113            }
1114            Symbol::DecimalEscape(ch) => Ok(ch),
1115        }
1116    }
1117
1118    /// Converts the symbol into a `char`.
1119    pub fn into_char(self) -> Result<char, BadSymbol> {
1120        match self {
1121            Symbol::Char(ch) | Symbol::SimpleEscape(ch) => Ok(ch),
1122            Symbol::DecimalEscape(_) => Err(BadSymbol(self))
1123        }
1124    }
1125
1126    /// Converts the symbol representing a digit into its integer value.
1127    pub fn into_digit(self, base: u32) -> Result<u32, SyntaxError> {
1128        if let Symbol::Char(ch) = self {
1129            match ch.to_digit(base) {
1130                Some(ch) => Ok(ch),
1131                None => Err(SyntaxError::Unexpected(self))
1132            }
1133        }
1134        else {
1135            Err(SyntaxError::Unexpected(self))
1136        }
1137    }
1138
1139    /// Pushes a symbol that is a byte to the end of a byte buffer.
1140    ///
1141    /// If the symbol is a byte as per the rules described in `into_byte`,
1142    /// it will be pushed to the end of `buf`, reserving additional space
1143    /// if there isn’t enough space remaining.
1144    pub fn push_to_buf(self, buf: &mut BytesMut) -> Result<(), BadSymbol> {
1145        self.into_byte().map(|ch| {
1146            if buf.remaining_mut() == 0 {
1147                buf.reserve(1);
1148            }
1149            buf.put_u8(ch)
1150        })
1151    }
1152
1153    /// Returns whether the symbol can occur as part of a word.
1154    pub fn is_word_char(self) -> bool {
1155        match self {
1156            Symbol::Char(ch) => {
1157                ch != ' ' && ch != '\t' && ch != '(' && ch != ')' &&
1158                ch != ';' && ch != '"'
1159            }
1160            _ => true
1161        }
1162    }
1163}
1164
1165
1166//--- From
1167
1168impl From<char> for Symbol {
1169    fn from(ch: char) -> Symbol {
1170        Symbol::Char(ch)
1171    }
1172}
1173
1174
1175//--- Display
1176
1177impl fmt::Display for Symbol {
1178    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1179        match *self {
1180            Symbol::Char(ch) => write!(f, "{}", ch),
1181            Symbol::SimpleEscape(ch) => write!(f, "\\{}", ch),
1182            Symbol::DecimalEscape(ch) => write!(f, "\\{:03}", ch),
1183        }
1184    }
1185}
1186
1187
1188//------------ Token ---------------------------------------------------------
1189
1190/// A single symbol parsed from a master file.
1191#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1192pub enum Token {
1193    /// A regular symbol.
1194    Symbol(Symbol),
1195
1196    /// A new line.
1197    ///
1198    /// This needs special treatment because of the varying encoding of
1199    /// newlines on different systems.
1200    Newline,
1201}
1202
1203impl Token {
1204    /// Checks for space-worthy character outside a parenthesized group.
1205    ///
1206    /// These are horizontal white space plus opening and closing parentheses
1207    /// which need special treatment.
1208    fn is_non_paren_space(self) -> bool {
1209        match self {
1210            Token::Symbol(Symbol::Char(ch)) => {
1211                ch == ' ' || ch == '\t' || ch == '(' || ch == ')'
1212            }
1213            _ => false
1214        }
1215    }
1216
1217    /// Checks for space-worthy character inside a parenthesized group.
1218    ///
1219    /// These are all from `is_non_paren_space()` plus a semicolon and line
1220    /// break characters.
1221    fn is_paren_space(self) -> bool {
1222        match self {
1223            Token::Symbol(Symbol::Char(ch)) => {
1224                ch == ' ' || ch == '\t' || ch == '(' || ch == ')' ||
1225                ch == ';'
1226            }
1227            Token::Newline => true,
1228            _ => false
1229        }
1230    }
1231
1232    /// Returns whether the token is a newline.
1233    fn is_newline(self) -> bool {
1234        match self {
1235            Token::Newline => true,
1236            _ => false,
1237        }
1238    }
1239
1240    /// Returns whether the token starts a newline sequence.
1241    ///
1242    /// This happens if the token is either a newline itself or an unescaped
1243    /// semicolon which starts a comment until line’s end.
1244    fn is_newline_ahead(self) -> bool {
1245        match self {
1246            Token::Symbol(Symbol::Char(';')) => true,
1247            Token::Newline => true,
1248            _ => false,
1249        }
1250    }
1251}
1252
1253
1254//------------ NewlineMode ---------------------------------------------------
1255
1256/// The newline mode used by a file.
1257///
1258/// Files can use different characters or character combinations to signal a
1259/// line break. Since line breaks are significant in master files, we need to
1260/// use the right mode.
1261#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1262enum NewlineMode {
1263    /// Each occurence of the content is a newline.
1264    Single(char),
1265
1266    /// Each combination of the two chars is a newline.
1267    Double(char, char),
1268
1269    /// We don’t know yet.
1270    Unknown,
1271}
1272
1273
1274//------------ SymbolError ---------------------------------------------------
1275
1276/// An error happened when reading a symbol.
1277#[derive(Clone, Copy, Debug, Eq, Fail, PartialEq)]
1278pub enum SymbolError {
1279    #[fail(display="illegal escape sequence")]
1280    BadEscape,
1281
1282    #[fail(display="unexpected end of input")]
1283    ShortInput
1284}
1285
1286
1287//------------ BadSymbol -----------------------------------------------------
1288
1289/// A symbol of unexepected value was encountered. 
1290#[derive(Clone, Copy, Debug, Eq, Fail, PartialEq)]
1291#[fail(display="bad symbol '{}'", _0)]
1292pub struct BadSymbol(pub Symbol);
1293
1294
1295//------------ SyntaxError ---------------------------------------------------
1296
1297/// A syntax error happened while scanning master data.
1298#[derive(Debug, Fail)]
1299pub enum SyntaxError {
1300    #[fail(display="expected '{}'", _0)]
1301    Expected(String),
1302
1303    #[fail(display="expected a new line")]
1304    ExpectedNewline,
1305
1306    #[fail(display="expected white space")]
1307    ExpectedSpace,
1308
1309    #[fail(display="invalid escape sequence")]
1310    IllegalEscape,
1311
1312    #[fail(display="invalid integer value")]
1313    IllegalInteger, // TODO Add kind
1314
1315    #[fail(display="invalid address: {}", _0)]
1316    IllegalAddr(AddrParseError),
1317
1318    #[fail(display="illegal domain name: {}", _0)]
1319    IllegalName(name::FromStrError),
1320
1321    #[fail(display="character string too long")]
1322    LongCharStr,
1323
1324    #[fail(display="hex string with an odd number of characters")]
1325    UnevenHexString,
1326
1327    #[fail(display="more data given than in the length byte")]
1328    LongGenericData,
1329
1330    #[fail(display="nested parentheses")]
1331    NestedParentheses,
1332
1333    #[fail(display="omitted TTL but no default TTL given")]
1334    NoDefaultTtl,
1335
1336    #[fail(display="omitted class but no previous class given")]
1337    NoLastClass,
1338
1339    #[fail(display="omitted owner but no previous owner given")]
1340    NoLastOwner,
1341
1342    #[fail(display="owner @ without preceding $ORIGIN")]
1343    NoOrigin,
1344
1345    #[fail(display="relative domain name")]
1346    RelativeName,
1347
1348    #[fail(display="unexpected '{}'", _0)]
1349    Unexpected(Symbol),
1350
1351    #[fail(display="unexpected newline")]
1352    UnexpectedNewline,
1353
1354    #[fail(display="unexpected end of file")]
1355    UnexpectedEof,
1356
1357    #[fail(display="unknown mnemonic")]
1358    UnknownMnemonic,
1359
1360    /// Used when converting some other content fails.
1361    #[fail(display="{}", _0)]
1362    Content(Box<Fail>),
1363}
1364
1365impl SyntaxError {
1366    pub fn content<E: Fail>(err: E) -> Self {
1367        SyntaxError::Content(Box::new(err))
1368    }
1369}
1370
1371
1372impl From<BadSymbol> for SyntaxError {
1373    fn from(err: BadSymbol) -> SyntaxError {
1374        SyntaxError::Unexpected(err.0)
1375    }
1376}
1377
1378impl From<AddrParseError> for SyntaxError {
1379    fn from(err: AddrParseError) -> SyntaxError {
1380        SyntaxError::IllegalAddr(err)
1381    }
1382}
1383
1384impl From<name::FromStrError> for SyntaxError {
1385    fn from(err: name::FromStrError) -> SyntaxError {
1386        SyntaxError::IllegalName(err)
1387    }
1388}
1389
1390impl From<name::PushNameError> for SyntaxError {
1391    fn from(err: name::PushNameError) -> SyntaxError {
1392        SyntaxError::from(name::FromStrError::from(err))
1393    }
1394}
1395
1396
1397//------------ ScanError -----------------------------------------------------
1398
1399/// An error happened while scanning master data.
1400#[derive(Debug)]
1401pub enum ScanError {
1402    Source(io::Error, Pos),
1403    Syntax(SyntaxError, Pos),
1404}
1405
1406impl From<(io::Error, Pos)> for ScanError {
1407    fn from(err: (io::Error, Pos)) -> ScanError {
1408        ScanError::Source(err.0, err.1)
1409    }
1410}
1411
1412impl From<(SyntaxError, Pos)> for ScanError {
1413    fn from(err: (SyntaxError, Pos)) -> ScanError {
1414        ScanError::Syntax(err.0, err.1)
1415    }
1416}
1417
1418
1419//------------ Pos -----------------------------------------------------------
1420
1421/// The human-friendly position in a reader.
1422#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
1423pub struct Pos {
1424    line: usize,
1425    col: usize
1426}
1427
1428impl Pos {
1429    pub fn new() -> Pos {
1430        Pos { line: 1, col: 1 }
1431    }
1432
1433    pub fn line(&self) -> usize { self.line }
1434    pub fn col(&self) -> usize { self.col }
1435
1436    pub fn update(&mut self, ch: Token) {
1437        match ch {
1438            Token::Symbol(Symbol::Char(_)) => self.col += 1,
1439            Token::Symbol(Symbol::SimpleEscape(_)) => self.col += 2,
1440            Token::Symbol(Symbol::DecimalEscape(_)) => self.col += 4,
1441            Token::Newline => { self.line += 1; self.col = 1 }
1442        }
1443    }
1444
1445    pub fn prev(&self) -> Pos {
1446        Pos { line: self.line,
1447              col: if self.col <= 1 { 1 } else { self.col - 1 }
1448        }
1449    }
1450}
1451
1452impl From<(usize, usize)> for Pos {
1453    fn from(src: (usize, usize)) -> Pos {
1454        Pos { line: src.0, col: src.1 }
1455    }
1456}
1457
1458impl PartialEq<(usize, usize)> for Pos {
1459    fn eq(&self, other: &(usize, usize)) -> bool {
1460        self.line == other.0 && self.col == other.1
1461    }
1462}
1463
1464
1465//============ Test ==========================================================
1466
1467#[cfg(test)]
1468mod test {
1469    use super::*;
1470
1471    #[test]
1472    fn scan_word() {
1473        let mut scanner = Scanner::new("one two three\nfour");
1474        assert_eq!(scanner.scan_string_word(Ok).unwrap(), "one");
1475    }
1476}