//! Scanning master file tokens.
use crate::base::name;
use crate::base::name::Dname;
use crate::base::net::AddrParseError;
use crate::base::str::{BadSymbol, Symbol};
use crate::utils::{base32, base64};
use bytes::{BufMut, Bytes, BytesMut};
use std::boxed::Box;
use std::string::String;
use std::vec::Vec;
use std::{error, fmt, io};
//------------ CharSource ----------------------------------------------------
/// A source of master file characters.
///
/// This is very similar to an iterator except that `next`’s return value has
/// the result outside for easier error handling.
pub trait CharSource {
/// Provides the next character in the source.
///
/// If the source runs out of characters, returns `Ok(None)`.
fn next(&mut self) -> Result<Option<char>, io::Error>;
}
//------------ Scanner -------------------------------------------------------
/// Reader of master file tokens.
///
/// A scanner reads characters from a source and converts them into tokens or
/// errors.
#[cfg(feature = "bytes")]
#[derive(Clone, Debug)]
pub struct Scanner<C: CharSource> {
/// The underlying character source.
chars: C,
/// The buffer for rejected tokens.
///
/// It will be kept short by flushing it every time we successfully read
/// to its end.
buf: Vec<Token>,
/// Index in `buf` of the start of the token currently being read.
start: usize,
/// Index in `buf` of the next character to be read.
cur: usize,
/// Human-friendly position in `chars` of `start`.
start_pos: Pos,
/// Human-friendly position in `chars` of `cur`.
cur_pos: Pos,
/// Was the start of token in a parenthesized group?
paren: bool,
/// Our newline mode
newline: NewlineMode,
/// The current origin for domain names, if any.
origin: Option<Dname<Bytes>>,
}
/// # Creation
///
#[cfg(feature = "bytes")]
impl<C: CharSource> Scanner<C> {
/// Creates a new scanner.
pub fn new(chars: C) -> Self {
Scanner::with_pos(chars, Pos::new())
}
/// Creates a new scanner using the given character source and position.
///
/// The scanner will assume that the current position of `chars`
/// corresponds to the human-friendly position `pos`.
pub fn with_pos(chars: C, pos: Pos) -> Self {
Scanner {
chars,
buf: Vec::new(),
start: 0,
cur: 0,
start_pos: pos,
cur_pos: pos,
paren: false,
newline: NewlineMode::Unknown,
origin: None,
}
}
}
/// # Access to Origin
///
/// Domain names in a master file that do not end in a dot are relative to
/// some origin. This origin is simply appened to them to form an absolute
/// name.
///
/// Since domain names can appear all over the place and we don’t want to
/// have to pass around the origin all the time, it is part of the scanner
/// and can be set and retrieved any time.
#[cfg(feature = "bytes")]
impl<C: CharSource> Scanner<C> {
/// Returns the current origin if any.
pub fn origin(&self) -> &Option<Dname<Bytes>> {
&self.origin
}
/// Sets the origin to the given value.
pub fn set_origin(&mut self, origin: Option<Dname<Bytes>>) {
self.origin = origin
}
}
/// # Fundamental Scanning
///
#[cfg(feature = "bytes")]
impl<C: CharSource> Scanner<C> {
/// Returns whether the scanner has reached the end of data.
pub fn eof_reached(&mut self) -> bool {
!matches!(self.peek(), Ok(Some(_)))
}
/// Returns the current position of the scanner.
pub fn pos(&self) -> Pos {
self.cur_pos
}
/// Scans a word token.
///
/// A word is a sequence of non-special characters and escape sequences
/// followed by a non-empty sequence of space unless it is followed
/// directly by a [newline](#method.scan_newline). If successful, the
/// method will position at the end of the space sequence if it is
/// required. That is, you can scan for two subsequent word tokens
/// without worrying about the space between them.
///
/// The method starts out with a `target` value and two closures. The
/// first closure, `symbolop`, is being fed symbols of the word one by one
/// and should feed them into the target. Once the word ended, the
/// second closure is called to convert the target into the final result.
/// Both can error out at any time stopping processing and leading the
/// scanner to revert to the beginning of the token.
pub fn scan_word<T, U, F, G>(
&mut self,
mut target: T,
mut symbolop: F,
finalop: G,
) -> Result<U, ScanError>
where
F: FnMut(&mut T, Symbol) -> Result<(), SyntaxError>,
G: FnOnce(T) -> Result<U, SyntaxError>,
{
match self.peek()? {
Some(Token::Symbol(ch)) => {
if !ch.is_word_char() {
return self.err(SyntaxError::Unexpected(ch));
}
}
Some(Token::Newline) => {
return self.err(SyntaxError::UnexpectedNewline)
}
None => return self.err(SyntaxError::UnexpectedEof),
};
while let Some(ch) = self.cond_read_symbol(Symbol::is_word_char)? {
if let Err(err) = symbolop(&mut target, ch) {
return self.err_cur(err);
}
}
let res = match finalop(target) {
Ok(res) => res,
Err(err) => return self.err(err),
};
self.skip_delimiter()?;
Ok(res)
}
/// Scans a word with Unicode text into a `String`.
///
/// The method scans a word that consists of characters and puts these
/// into a `String`. Once the word ends, the caller is given a chance
/// to convert the value into something else via the closure `finalop`.
/// This closure can fail, resulting in an error and back-tracking to
/// the beginning of the phrase.
pub fn scan_string_word<U, G>(
&mut self,
finalop: G,
) -> Result<U, ScanError>
where
G: FnOnce(String) -> Result<U, SyntaxError>,
{
self.scan_word(
String::new(),
|res, ch| {
let ch = match ch {
Symbol::Char(ch) | Symbol::SimpleEscape(ch) => ch,
Symbol::DecimalEscape(ch) => ch as char,
};
res.push(ch);
Ok(())
},
finalop,
)
}
/// Scans a quoted word.
///
/// A quoted word starts with a double quote `"`, followed by all sorts
/// of characters or escape sequences until the next (unescaped) double
/// quote. It may contain line feeds. Like a regular word, a quoted word
/// is followed by a non-empty space sequence unless it is directly
/// followed by a [newline](#method.scan_newline). This space is not
/// part of the content but quietly skipped over.
///
/// The method starts out with a `target` value and two closures. The
/// first closure, `symbolop`, is being fed symbols of the word one by one
/// and should feed them into the target. Once the word ended, the
/// second closure is called to convert the target into the final result.
/// Both can error out at any time stopping processing and leading the
/// scanner to revert to the beginning of the token.
pub fn scan_quoted<T, U, F, G>(
&mut self,
mut target: T,
mut symbolop: F,
finalop: G,
) -> Result<U, ScanError>
where
F: FnMut(&mut T, Symbol) -> Result<(), SyntaxError>,
G: FnOnce(T) -> Result<U, SyntaxError>,
{
match self.read()? {
Some(Token::Symbol(Symbol::Char('"'))) => {}
Some(Token::Symbol(ch)) => {
return self.err(SyntaxError::Unexpected(ch))
}
Some(Token::Newline) => {
return self.err(SyntaxError::UnexpectedNewline)
}
None => return self.err(SyntaxError::UnexpectedEof),
}
loop {
match self.read()? {
Some(Token::Symbol(Symbol::Char('"'))) => break,
Some(Token::Symbol(ch)) => {
if let Err(err) = symbolop(&mut target, ch) {
return self.err(err);
}
}
Some(Token::Newline) => {
return self.err(SyntaxError::UnexpectedNewline)
}
None => return self.err(SyntaxError::UnexpectedEof),
}
}
let res = match finalop(target) {
Ok(res) => res,
Err(err) => return self.err(err),
};
self.skip_delimiter()?;
Ok(res)
}
/// Scans a phrase: a normal word or a quoted word.
///
/// This method behaves like [scan_quoted()](#method.scan_quoted) if
/// the next character is a double quote or like
/// [scan_word()](#method.scan_word) otherwise.
pub fn scan_phrase<T, U, F, G>(
&mut self,
target: T,
symbolop: F,
finalop: G,
) -> Result<U, ScanError>
where
F: FnMut(&mut T, Symbol) -> Result<(), SyntaxError>,
G: FnOnce(T) -> Result<U, SyntaxError>,
{
if let Some(Token::Symbol(Symbol::Char('"'))) = self.peek()? {
self.scan_quoted(target, symbolop, finalop)
} else {
self.scan_word(target, symbolop, finalop)
}
}
/// Scans a phrase with byte content into a `Bytes` value.
///
/// The method scans a phrase that consists of byte only and puts these
/// bytes into a `Bytes` value. Once the phrase ends, the caller is given
/// a chance to convert the value into something else via the closure
/// `finalop`. This closure can fail, resulting in an error and
/// back-tracking to the beginning of the phrase.
pub fn scan_byte_phrase<U, G>(
&mut self,
finalop: G,
) -> Result<U, ScanError>
where
G: FnOnce(Bytes) -> Result<U, SyntaxError>,
{
self.scan_phrase(
BytesMut::new(),
|buf, symbol| {
symbol
.into_octet()
.map(|ch| {
if buf.remaining_mut() == 0 {
buf.reserve(1);
}
buf.put_u8(ch)
})
.map_err(Into::into)
},
|buf| finalop(buf.freeze()),
)
}
/// Scans a phrase with Unicode text into a `String`.
///
/// The method scans a phrase that consists of characters and puts these
/// into a `String`. Once the phrase ends, the caller is given
/// a chance to convert the value into something else via the closure
/// `finalop`. This closure can fail, resulting in an error and
/// back-tracking to the beginning of the phrase.
pub fn scan_string_phrase<U, G>(
&mut self,
finalop: G,
) -> Result<U, ScanError>
where
G: FnOnce(String) -> Result<U, SyntaxError>,
{
self.scan_phrase(
String::new(),
|res, ch| {
let ch = match ch {
Symbol::Char(ch) | Symbol::SimpleEscape(ch) => ch,
Symbol::DecimalEscape(ch) => ch as char,
};
res.push(ch);
Ok(())
},
finalop,
)
}
/// Scans over a mandatory newline.
///
/// A newline is either an optional comment followed by a newline sequence
/// or the end of file. The latter is so that a file lacking a line feed
/// after its last line is still parsed successfully.
pub fn scan_newline(&mut self) -> Result<(), ScanError> {
match self.read()? {
Some(Token::Symbol(Symbol::Char(';'))) => {
while let Some(ch) = self.read()? {
if ch.is_newline() {
break;
}
}
}
Some(Token::Newline) | None => {}
_ => return self.err(SyntaxError::ExpectedNewline),
}
self.ok();
Ok(())
}
/// Scans over a mandatory sequence of space.
///
/// There are two flavors of space. The simple form is any sequence
/// of a space character `' '` or a horizontal tab '`\t'`. However,
/// a parenthesis can be used to turn [newlines](#method.scan_newline)
/// into normal space. This method recognises parentheses and acts
/// accordingly.
pub fn scan_space(&mut self) -> Result<(), ScanError> {
if self.skip_space()? {
self.ok();
Ok(())
} else {
self.err(SyntaxError::ExpectedSpace)
}
}
/// Scans over an optional sequence of space.
pub fn scan_opt_space(&mut self) -> Result<(), ScanError> {
self.skip_space()?;
Ok(())
}
/// Skips over an entry.
///
/// Keeps reading until it successfully scans a newline. The method
/// tries to be smart about that and considers parentheses, quotes, and
/// escapes but also tries its best to not fail.
pub fn skip_entry(&mut self) -> Result<(), ScanError> {
let mut quote = false;
loop {
match self.read()? {
None => break,
Some(Token::Newline) => {
if !quote && !self.paren {
break;
}
}
Some(Token::Symbol(Symbol::Char('"'))) => quote = !quote,
Some(Token::Symbol(Symbol::Char('('))) => {
if !quote {
if self.paren {
return self.err(SyntaxError::NestedParentheses);
}
self.paren = true
}
}
Some(Token::Symbol(Symbol::Char(')'))) => {
if !quote {
if !self.paren {
return self
.err(SyntaxError::Unexpected(')'.into()));
}
self.paren = false
}
}
_ => {}
}
}
self.ok();
Ok(())
}
/// Skips over the word with the content `literal`.
///
/// The content indeed needs to be literally the literal. Escapes are
/// not translated before comparison and case has to be as is.
pub fn skip_literal(&mut self, literal: &str) -> Result<(), ScanError> {
self.scan_word(
literal,
|left, symbol| {
let first = match left.chars().next() {
Some(ch) => ch,
None => {
return Err(SyntaxError::Expected(literal.into()))
}
};
match symbol {
Symbol::Char(ch) if ch == first => {
*left = &left[ch.len_utf8()..];
Ok(())
}
_ => Err(SyntaxError::Expected(literal.into())),
}
},
|left| {
if left.is_empty() {
Ok(())
} else {
Err(SyntaxError::Expected(literal.into()))
}
},
)
}
}
/// # Complex Scanning
///
#[cfg(feature = "bytes")]
impl<C: CharSource> Scanner<C> {
/// Scans a word containing a sequence of pairs of hex digits.
///
/// The word is returned as a `Bytes` value with each byte representing
/// the decoded value of one hex digit pair.
pub fn scan_hex_word<U, G>(&mut self, finalop: G) -> Result<U, ScanError>
where
G: FnOnce(Bytes) -> Result<U, SyntaxError>,
{
self.scan_word(
(BytesMut::new(), None), // result and optional first char.
|&mut (ref mut res, ref mut first), symbol| {
hex_symbolop(res, first, symbol)
},
|(res, first)| {
if let Some(ch) = first {
Err(SyntaxError::Unexpected(Symbol::Char(
::std::char::from_digit(ch, 16).unwrap(),
)))
} else {
finalop(res.freeze())
}
},
)
}
#[allow(clippy::question_mark)]
pub fn scan_hex_words<U, G>(&mut self, finalop: G) -> Result<U, ScanError>
where
G: FnOnce(Bytes) -> Result<U, SyntaxError>,
{
let start_pos = self.pos();
let mut buf = BytesMut::new();
let mut first = true;
loop {
let res = self.scan_word(
(&mut buf, None),
|&mut (ref mut buf, ref mut first), symbol| {
hex_symbolop(buf, first, symbol)
},
|(_, first)| {
if let Some(ch) = first {
Err(SyntaxError::Unexpected(Symbol::Char(
::std::char::from_digit(ch, 16).unwrap(),
)))
} else {
Ok(())
}
},
);
if first {
if let Err(err) = res {
return Err(err);
}
first = false;
} else if res.is_err() {
break;
}
}
finalop(buf.freeze()).map_err(|err| (err, start_pos).into())
}
/// Scans a phrase containing base32hex encoded data.
///
/// In particular, this decodes the “base32hex” decoding definied in
/// RFC 4648 without padding.
pub fn scan_base32hex_phrase<U, G>(
&mut self,
finalop: G,
) -> Result<U, ScanError>
where
G: FnOnce(Bytes) -> Result<U, SyntaxError>,
{
self.scan_phrase(
base32::Decoder::<BytesMut>::new_hex(),
|decoder, symbol| {
decoder
.push(symbol.into_char()?)
.map_err(SyntaxError::content)
},
|decoder| {
finalop(decoder.finalize().map_err(SyntaxError::content)?)
},
)
}
/// Scans a sequence of phrases containing base64 encoded data.
#[allow(clippy::question_mark)]
pub fn scan_base64_phrases<U, G>(
&mut self,
finalop: G,
) -> Result<U, ScanError>
where
G: FnOnce(Bytes) -> Result<U, SyntaxError>,
{
let start_pos = self.pos();
let mut decoder = base64::Decoder::<BytesMut>::new();
let mut first = true;
loop {
let res = self.scan_phrase(
&mut decoder,
|decoder, symbol| {
decoder
.push(symbol.into_char()?)
.map_err(SyntaxError::content)
},
Ok,
);
if first {
if let Err(err) = res {
return Err(err);
}
first = false;
} else if res.is_err() {
break;
}
}
let bytes = decoder
.finalize()
.map_err(|err| (SyntaxError::content(err), self.pos()))?;
finalop(bytes).map_err(|err| (err, start_pos).into())
}
}
#[cfg(feature = "bytes")]
fn hex_symbolop(
buf: &mut BytesMut,
first: &mut Option<u32>,
symbol: Symbol,
) -> Result<(), SyntaxError> {
let ch = match symbol {
Symbol::Char(ch) => match ch.to_digit(16) {
Some(ch) => ch,
_ => return Err(SyntaxError::Unexpected(symbol)),
},
_ => return Err(SyntaxError::Unexpected(symbol)),
};
if let Some(ch1) = first.take() {
if buf.remaining_mut() == 0 {
buf.reserve(1)
}
buf.put_u8((ch1 as u8) << 4 | (ch as u8));
} else {
*first = Some(ch)
}
Ok(())
}
/// # Fundamental Reading, Processing, and Back-tracking
///
#[cfg(feature = "bytes")]
impl<C: CharSource> Scanner<C> {
/// Reads a char from the source.
///
/// This function is here to for error conversion only and updating the
/// human-friendly position.
fn chars_next(&mut self) -> Result<Option<char>, ScanError> {
self.chars.next().map_err(|err| {
let mut pos = self.cur_pos;
for ch in &self.buf {
pos.update(*ch)
}
ScanError::Source(err, pos)
})
}
/// Tries to read at least one additional character into the buffer.
///
/// Returns whether that succeeded.
fn source_token(&mut self) -> Result<bool, ScanError> {
let ch = match self.chars_next()? {
Some(ch) => ch,
None => return Ok(false),
};
if ch == '\\' {
self.source_escape()
} else {
self.source_normal(ch)
}
}
/// Tries to read and return the content of an escape sequence.
fn source_escape(&mut self) -> Result<bool, ScanError> {
let ch = match self.chars_next()? {
Some(ch) if ch.is_ascii_digit() => {
let ch = ch.to_digit(10).unwrap() * 100;
let ch2 = match self.chars_next()? {
Some(ch) => match ch.to_digit(10) {
Some(ch) => ch * 10,
None => {
return self.err_cur(SyntaxError::IllegalEscape)
}
},
None => return self.err_cur(SyntaxError::UnexpectedEof),
};
let ch3 = match self.chars_next()? {
Some(ch) => match ch.to_digit(10) {
Some(ch) => ch,
None => {
return self.err_cur(SyntaxError::IllegalEscape)
}
},
None => return self.err_cur(SyntaxError::UnexpectedEof),
};
let res = ch + ch2 + ch3;
if res > 255 {
return self.err_cur(SyntaxError::IllegalEscape);
} else {
Symbol::DecimalEscape(res as u8)
}
}
Some(ch) => Symbol::SimpleEscape(ch),
None => return self.err_cur(SyntaxError::UnexpectedEof),
};
self.buf.push(Token::Symbol(ch));
Ok(true)
}
/// Tries to source a normal character.
fn source_normal(&mut self, ch: char) -> Result<bool, ScanError> {
match self.newline {
NewlineMode::Single(sep) => {
if ch == sep {
self.buf.push(Token::Newline)
} else {
self.buf.push(Token::Symbol(Symbol::Char(ch)))
}
Ok(true)
}
NewlineMode::Double(first, second) => {
if ch != first {
self.buf.push(Token::Symbol(Symbol::Char(ch)));
Ok(true)
} else {
match self.chars_next()? {
Some(ch) if ch == second => {
self.buf.push(Token::Newline);
Ok(true)
}
Some(ch) => {
self.buf.push(Token::Symbol(Symbol::Char(first)));
self.buf.push(Token::Symbol(Symbol::Char(ch)));
Ok(true)
}
None => {
// Half a newline is still EOF.
Ok(false)
}
}
}
}
NewlineMode::Unknown => {
if ch != '\r' && ch != '\n' {
self.buf.push(Token::Symbol(Symbol::Char(ch)));
Ok(true)
} else if let Some(second) = self.chars_next()? {
match (ch, second) {
('\r', '\n') | ('\n', '\r') => {
self.newline = NewlineMode::Double(ch, second);
self.buf.push(Token::Newline);
}
('\r', '\r') | ('\n', '\n') => {
self.newline = NewlineMode::Single(ch);
self.buf.push(Token::Newline);
self.buf.push(Token::Newline);
}
('\r', _) | ('\n', _) => {
self.newline = NewlineMode::Single(ch);
self.buf.push(Token::Newline);
self.buf
.push(Token::Symbol(Symbol::Char(second)));
}
_ => {
self.buf.push(Token::Symbol(Symbol::Char(ch)));
self.buf
.push(Token::Symbol(Symbol::Char(second)));
}
}
Ok(true)
} else {
if ch == '\r' || ch == '\n' {
self.buf.push(Token::Newline);
} else {
self.buf.push(Token::Symbol(Symbol::Char(ch)))
}
Ok(true)
}
}
}
}
/// Tries to peek at the next symbol.
///
/// On success, returns the symbol. It the end of the
/// underlying source is reached, returns `Ok(None)`. If reading on the
/// underlying source results in an error, returns that.
fn peek(&mut self) -> Result<Option<Token>, ScanError> {
if self.buf.len() == self.cur && !self.source_token()? {
return Ok(None);
}
Ok(Some(self.buf[self.cur]))
}
/// Tries to read a symbol.
///
/// On success, returns the `Ok(Some(_))` character. It the end of the
/// underlying source is reached, returns `Ok(None)`. If reading on the
/// underlying source results in an error, returns that.
fn read(&mut self) -> Result<Option<Token>, ScanError> {
self.peek().map(|res| match res {
Some(ch) => {
self.cur += 1;
self.cur_pos.update(ch);
Some(ch)
}
None => None,
})
}
/// Skip the first token.
///
/// Only ever call this if you called `peek` before and it did return
/// `Some(ch)`.
///
/// This is an optimization.
fn skip(&mut self, ch: Token) {
self.cur += 1;
self.cur_pos.update(ch)
}
/// Progresses the scanner to the current position and returns `t`.
fn ok(&mut self) {
if self.buf.len() == self.cur {
self.buf.clear();
self.start = 0;
self.cur = 0;
} else {
self.start = self.cur;
}
self.start_pos = self.cur_pos;
}
/// Backtracks to the last token start and reports an error there.
///
/// Returns a syntax error with the given error value and the position
/// of the token start.
///
/// The method is generic over whatever type `T` so it can be used to
/// create whatever particular result is needed.
fn err<T>(&mut self, err: SyntaxError) -> Result<T, ScanError> {
let pos = self.start_pos;
self.err_at(err, pos)
}
fn err_cur<T>(&mut self, err: SyntaxError) -> Result<T, ScanError> {
let pos = self.cur_pos;
self.err_at(err, pos)
}
/// Reports an error at current position and then backtracks.
fn err_at<T>(
&mut self,
err: SyntaxError,
pos: Pos,
) -> Result<T, ScanError> {
self.cur = self.start;
self.cur_pos = self.start_pos;
Err(ScanError::Syntax(err, pos))
}
}
/// # More Complex Internal Reading
///
#[cfg(feature = "bytes")]
impl<C: CharSource> Scanner<C> {
/// Reads a symbol if it is accepted by a closure.
///
/// The symbol is passed to the closure which should return `true` if
/// it accepts it in which case the method returns `Ok(Some(_))`. If
/// the closure returns `false` or the end of file is reached, `Ok(None)`
/// is returned.
///
/// The method does not progress or backtrack.
fn cond_read<F>(&mut self, f: F) -> Result<Option<Token>, ScanError>
where
F: FnOnce(Token) -> bool,
{
match self.peek()? {
Some(ch) if f(ch) => self.read(),
_ => Ok(None),
}
}
fn cond_read_symbol<F>(
&mut self,
f: F,
) -> Result<Option<Symbol>, ScanError>
where
F: FnOnce(Symbol) -> bool,
{
match self.peek()? {
Some(Token::Symbol(ch)) if f(ch) => {
self.skip(Token::Symbol(ch));
Ok(Some(ch))
}
_ => Ok(None),
}
}
/// Skips over delimiting space.
///
/// A delimiter is a non-empty sequence of space (which means that
/// something like `"foo(bar"` qualifies as the two words `"foo"` and
/// `"bar".) or if the following byte is the beginning of a newline or
/// if the scanner has reached end-of-file.
///
/// Progresses the scanner on success, otherwise backtracks with an
/// ‘unexpected space’ error.
fn skip_delimiter(&mut self) -> Result<(), ScanError> {
if !self.skip_space()? {
match self.peek()? {
Some(ch) if ch.is_newline_ahead() => {}
None => {}
_ => return self.err(SyntaxError::ExpectedSpace),
}
}
self.ok();
Ok(())
}
/// Skips over space.
///
/// Normally, space is ordinary white space (`' '` and `'\t'`).
/// However, an opening parenthesis can be used to make newlines appear
/// as space, too. A closing parenthesis resets this behaviour.
///
/// This method cleverly hides all of this and simply walks over whatever
/// is space. It returns whether there was at least one character of
/// space. It does not progress the scanner but backtracks on error.
fn skip_space(&mut self) -> Result<bool, ScanError> {
let mut res = false;
loop {
if self.paren {
match self.cond_read(Token::is_paren_space)? {
None => break,
Some(Token::Symbol(Symbol::Char('('))) => {
let pos = self.cur_pos.prev();
return self
.err_at(SyntaxError::NestedParentheses, pos);
}
Some(Token::Symbol(Symbol::Char(')'))) => {
self.paren = false;
}
Some(Token::Symbol(Symbol::Char(';'))) => {
while let Some(ch) = self.read()? {
if ch.is_newline() {
break;
}
}
}
_ => {}
}
} else {
match self.cond_read(Token::is_non_paren_space)? {
None => break,
Some(Token::Symbol(Symbol::Char('('))) => {
self.paren = true;
}
Some(Token::Symbol(Symbol::Char(')'))) => {
let pos = self.cur_pos.prev();
return self.err_at(
SyntaxError::Unexpected(')'.into()),
pos,
);
}
_ => {}
}
}
res = true;
}
Ok(res)
}
}
//------------ Scan ----------------------------------------------------------
/// A type that can by scanned from a master file.
#[cfg(feature = "bytes")]
pub trait Scan: Sized {
/// Scans a value from a master file.
fn scan<C: CharSource>(
scanner: &mut Scanner<C>,
) -> Result<Self, ScanError>;
}
#[cfg(feature = "bytes")]
impl Scan for u32 {
fn scan<C: CharSource>(
scanner: &mut Scanner<C>,
) -> Result<Self, ScanError> {
scanner.scan_phrase(
0u32,
|res, symbol| {
let ch = match symbol {
Symbol::Char(ch) => {
if let Some(value) = ch.to_digit(10) {
value
} else {
return Err(SyntaxError::Unexpected(symbol));
}
}
_ => return Err(SyntaxError::Unexpected(symbol)),
};
*res = match res.checked_mul(10) {
Some(res) => res,
None => return Err(SyntaxError::IllegalInteger),
};
*res = match res.checked_add(ch) {
Some(res) => res,
None => return Err(SyntaxError::IllegalInteger),
};
Ok(())
},
Ok,
)
}
}
#[cfg(feature = "bytes")]
impl Scan for u16 {
fn scan<C: CharSource>(
scanner: &mut Scanner<C>,
) -> Result<Self, ScanError> {
scanner.scan_phrase(
0u16,
|res, symbol| {
let ch = match symbol {
Symbol::Char(ch) => {
if let Some(value) = ch.to_digit(10) {
value as u16
} else {
return Err(SyntaxError::Unexpected(symbol));
}
}
_ => return Err(SyntaxError::Unexpected(symbol)),
};
*res = match res.checked_mul(10) {
Some(res) => res,
None => return Err(SyntaxError::IllegalInteger),
};
*res = match res.checked_add(ch) {
Some(res) => res,
None => return Err(SyntaxError::IllegalInteger),
};
Ok(())
},
Ok,
)
}
}
#[cfg(feature = "bytes")]
impl Scan for u8 {
fn scan<C: CharSource>(
scanner: &mut Scanner<C>,
) -> Result<Self, ScanError> {
scanner.scan_phrase(
0u8,
|res, symbol| {
let ch = match symbol {
Symbol::Char(ch) => {
if let Some(value) = ch.to_digit(10) {
value as u8
} else {
return Err(SyntaxError::Unexpected(symbol));
}
}
_ => return Err(SyntaxError::Unexpected(symbol)),
};
*res = match res.checked_mul(10) {
Some(res) => res,
None => return Err(SyntaxError::IllegalInteger),
};
*res = match res.checked_add(ch) {
Some(res) => res,
None => return Err(SyntaxError::IllegalInteger),
};
Ok(())
},
Ok,
)
}
}
//------------ Token ---------------------------------------------------------
/// A single symbol parsed from a master file.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Token {
/// A regular symbol.
Symbol(Symbol),
/// A new line.
///
/// This needs special treatment because of the varying encoding of
/// newlines on different systems.
Newline,
}
#[allow(dead_code)]
impl Token {
/// Checks for space-worthy character outside a parenthesized group.
///
/// These are horizontal white space plus opening and closing parentheses
/// which need special treatment.
fn is_non_paren_space(self) -> bool {
match self {
Token::Symbol(Symbol::Char(ch)) => {
ch == ' ' || ch == '\t' || ch == '(' || ch == ')'
}
_ => false,
}
}
/// Checks for space-worthy character inside a parenthesized group.
///
/// These are all from `is_non_paren_space()` plus a semicolon and line
/// break characters.
fn is_paren_space(self) -> bool {
match self {
Token::Symbol(Symbol::Char(ch)) => {
ch == ' ' || ch == '\t' || ch == '(' || ch == ')' || ch == ';'
}
Token::Newline => true,
_ => false,
}
}
/// Returns whether the token is a newline.
fn is_newline(self) -> bool {
matches!(self, Token::Newline)
}
/// Returns whether the token starts a newline sequence.
///
/// This happens if the token is either a newline itself or an unescaped
/// semicolon which starts a comment until line’s end.
fn is_newline_ahead(self) -> bool {
matches!(self, Token::Symbol(Symbol::Char(';')) | Token::Newline)
}
}
//------------ NewlineMode ---------------------------------------------------
/// The newline mode used by a file.
///
/// Files can use different characters or character combinations to signal a
/// line break. Since line breaks are significant in master files, we need to
/// use the right mode.
#[allow(dead_code)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum NewlineMode {
/// Each occurence of the content is a newline.
Single(char),
/// Each combination of the two chars is a newline.
Double(char, char),
/// We don’t know yet.
Unknown,
}
//------------ Pos -----------------------------------------------------------
/// The human-friendly position in a reader.
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub struct Pos {
line: usize,
col: usize,
}
impl Pos {
pub fn new() -> Pos {
Pos { line: 1, col: 1 }
}
pub fn line(&self) -> usize {
self.line
}
pub fn col(&self) -> usize {
self.col
}
pub fn update(&mut self, ch: Token) {
match ch {
Token::Symbol(Symbol::Char(_)) => self.col += 1,
Token::Symbol(Symbol::SimpleEscape(_)) => self.col += 2,
Token::Symbol(Symbol::DecimalEscape(_)) => self.col += 4,
Token::Newline => {
self.line += 1;
self.col = 1
}
}
}
pub fn prev(&self) -> Pos {
Pos {
line: self.line,
col: if self.col <= 1 { 1 } else { self.col - 1 },
}
}
}
impl From<(usize, usize)> for Pos {
fn from(src: (usize, usize)) -> Pos {
Pos {
line: src.0,
col: src.1,
}
}
}
impl PartialEq<(usize, usize)> for Pos {
fn eq(&self, other: &(usize, usize)) -> bool {
self.line == other.0 && self.col == other.1
}
}
impl fmt::Display for Pos {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}:{}", self.line, self.col)
}
}
//============ Error Types ===================================================
//------------ SyntaxError ---------------------------------------------------
/// A syntax error happened while scanning master data.
#[derive(Debug)]
#[non_exhaustive]
pub enum SyntaxError {
Expected(String),
ExpectedNewline,
ExpectedSpace,
IllegalEscape,
IllegalInteger, // TODO Add kind
IllegalAddr(AddrParseError),
IllegalName(name::FromStrError),
LongCharStr,
UnevenHexString,
LongGenericData,
NestedParentheses,
NoDefaultTtl,
NoLastClass,
NoLastOwner,
NoOrigin,
RelativeName,
Unexpected(Symbol),
UnexpectedNewline,
UnexpectedEof,
UnknownMnemonic,
///
/// Used when converting some other content fails.
Content(Box<dyn error::Error>),
}
impl SyntaxError {
pub fn content<E: error::Error + 'static>(err: E) -> Self {
SyntaxError::Content(Box::new(err))
}
}
//--- From
impl From<BadSymbol> for SyntaxError {
fn from(err: BadSymbol) -> SyntaxError {
SyntaxError::Unexpected(err.0)
}
}
impl From<AddrParseError> for SyntaxError {
fn from(err: AddrParseError) -> SyntaxError {
SyntaxError::IllegalAddr(err)
}
}
impl From<name::FromStrError> for SyntaxError {
fn from(err: name::FromStrError) -> SyntaxError {
SyntaxError::IllegalName(err)
}
}
impl From<name::PushNameError> for SyntaxError {
fn from(err: name::PushNameError) -> SyntaxError {
SyntaxError::from(name::FromStrError::from(err))
}
}
//--- Display and Error
impl fmt::Display for SyntaxError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
SyntaxError::Expected(ref s) => write!(f, "expected '{}'", s),
SyntaxError::ExpectedNewline => {
f.write_str("expected a new line")
}
SyntaxError::ExpectedSpace => f.write_str("expected white space"),
SyntaxError::IllegalEscape => {
f.write_str("invalid escape sequence")
}
SyntaxError::IllegalInteger => f.write_str("illegal integer"),
SyntaxError::IllegalAddr(ref err) => {
write!(f, "illegal address: {}", err)
}
SyntaxError::IllegalName(ref err) => {
write!(f, "illegal domain name: {}", err)
}
SyntaxError::LongCharStr => {
f.write_str("character string too long")
}
SyntaxError::UnevenHexString => {
f.write_str("hex string with an odd number of characters")
}
SyntaxError::LongGenericData => {
f.write_str("more data given than in the length byte")
}
SyntaxError::NestedParentheses => {
f.write_str("nested parentheses")
}
SyntaxError::NoDefaultTtl => {
f.write_str("omitted TTL but no default TTL given")
}
SyntaxError::NoLastClass => {
f.write_str("omitted class but no previous class given")
}
SyntaxError::NoLastOwner => {
f.write_str("omitted owner but no previous owner given")
}
SyntaxError::NoOrigin => {
f.write_str("owner @ without preceding $ORIGIN")
}
SyntaxError::RelativeName => f.write_str("relative domain name"),
SyntaxError::Unexpected(sym) => write!(f, "unexpected '{}'", sym),
SyntaxError::UnexpectedNewline => {
f.write_str("unexpected newline")
}
SyntaxError::UnexpectedEof => {
f.write_str("unexpected end of file")
}
SyntaxError::UnknownMnemonic => {
f.write_str("unexpected mnemomic")
}
SyntaxError::Content(ref content) => content.fmt(f),
}
}
}
impl error::Error for SyntaxError {}
//------------ ScanError -----------------------------------------------------
/// An error happened while scanning master data.
#[derive(Debug)]
pub enum ScanError {
Source(io::Error, Pos),
Syntax(SyntaxError, Pos),
}
//--- From
impl From<(io::Error, Pos)> for ScanError {
fn from(err: (io::Error, Pos)) -> ScanError {
ScanError::Source(err.0, err.1)
}
}
impl From<(SyntaxError, Pos)> for ScanError {
fn from(err: (SyntaxError, Pos)) -> ScanError {
ScanError::Syntax(err.0, err.1)
}
}
//--- Display and Error
impl fmt::Display for ScanError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
ScanError::Source(ref err, pos) => write!(f, "{}: {}", pos, err),
ScanError::Syntax(ref err, pos) => write!(f, "{}: {}", pos, err),
}
}
}
impl error::Error for ScanError {}
//============ Test ==========================================================
#[cfg(test)]
mod test {
use super::*;
#[test]
fn scan_word() {
let mut scanner = Scanner::new("one two three\nfour");
assert_eq!(scanner.scan_string_word(Ok).unwrap(), "one");
}
}