1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
use std::str; use ::bits::CharStrBuf; use ::bits::{DNameBuf, DNameSlice}; use ::bits::name::{DNameBuilder, DNameBuildInto}; use super::error::{Pos, ScanResult, SyntaxError, SyntaxResult}; //------------ Scanner ------------------------------------------------------- /// A trait for a scanner of master format tokens. /// /// The master format is using a small number of different token types. This /// trait provides a way to access a sequence of such tokens. There is a /// method prefix by `scan_` for each type of token that tries to scan a /// token of the given type. If it succeeds, it returns the token and /// progresses to the end of the token. If it fails, it returns an error and /// reverts to the position before the scanning attempt unless an IO error /// occured in which case the scanner becomes ‘broken.’ /// /// The methods that provide access to the token’s content do so through /// closures to let the user decide if and how copies ought to be made. Note /// that since the scanner may throw away the content at any time after the /// closure returns, you cannot keep the slice references passed in. This is /// not on purpose, not a mistake with the lifetime arguments. pub trait Scanner { /// # Fundamental Methods /// /// Returns whether the scanner has reached the end of data. /// /// This will return `false` if reading results in an IO error. fn is_eof(&mut self) -> bool; /// Returns the current position of the scanner. fn pos(&self) -> Pos; /// Scans a word token. /// /// A word is a sequence of non-special characters and escape sequences /// followed by a non-empty sequence of space unless it is followed /// directly by a [newline](#tymethod.scan_newline). If successful, the /// method will position at the end of the space sequence if it is /// required. That is, you can scan for two subsequent word tokens /// without worrying about the space between them. /// /// A reference to the content of the actual word (ie., without any /// trailing space) is passed to the provided closure. This is the raw /// content without any escape sequences translated. If the closure likes /// the content, it can return something which will then become the /// return value of the entire method. Otherwise, it returns a syntax /// error. In this case, the whole method will fails returning the syntax /// error and the position of the start of the token. fn scan_word<T, F>(&mut self, f: F) -> ScanResult<T> where F: FnOnce(&[u8]) -> SyntaxResult<T>; /// Scans a word, processing each character of its content separatedly. /// /// This method is similar to [scan_word()](#tymethod.scan_word) but the /// closure is called for each character of the content. Escape sequences /// are translated into the character they stand for. For each character, /// the closure receives the character value and a boolean /// indicating whether the character was in fact translated from an /// escape sequence. Ie., the content `b"f"` will be translated into one /// single closure call with `f(b'f', false)`, whereas the content /// `b"\102"` will also be just one call but with `f(b'f', true)`. /// /// If the closure returns `Ok(())`, the method proceeds to the next /// content character or, if there are no more characters, itself returns /// `Ok(())`. If the closure returns an error, the method returns to the /// start of the token and returns the error with that position. fn scan_word_bytes<F>(&mut self, mut f: F) -> ScanResult<()> where F: FnMut(u8, bool) -> SyntaxResult<()> { self.scan_word_into((), |_, b, escape| f(b, escape), |_| Ok(())) } fn scan_word_into<T, U, F, G>(&mut self, target: T, f: F, g: G) -> ScanResult<U> where F: FnMut(&mut T, u8, bool) -> SyntaxResult<()>, G: FnOnce(T) -> SyntaxResult<U>; /// Scans a quoted word. /// /// A quoted word starts with a double quote `"`, followed by all sorts /// of characters or escape sequences until the next (unescaped) double /// quote. It may contain line feeds. Like a regular word, a quoted word /// is followed by a non-empty space sequence unless it is directly /// followed by a [newline](#tymethod.scan_newline). This space is not /// part of the content but quietly skipped over. /// /// The reference to the raw content of the quoted word is given to the /// closure `f` which needs to decide of it fulfills its own /// requirements. If it does, it can translate it into a return value /// which is also returned by the method. Otherwise, it returns a syntax /// error which is reported by the method with the position of the /// first double quote. fn scan_quoted<T, F>(&mut self, f: F) -> ScanResult<T> where F: FnOnce(&[u8]) -> SyntaxResult<T>; /// Scans a quoted word, processing the content characters separatedly. /// /// This method is similar to [scan_quoted()](#tymethod.scan_quoted) but /// the closure is called for each character of the content. Escape /// sequences are translated into the character they stand for. For each /// character, the closure receives the character value and a boolean /// indicating whether the character was in fact translated from an /// escape sequence. Ie., the content `b"f"` will be translated into one /// single closure call with `f(b'f', false)`, whereas the content /// `b"\102"` will also be just one call but with `f(b'f', true)`. /// /// If the closure returns `Ok(())`, the method proceeds to the next /// content character or, if there are no more characters, itself returns /// `Ok(())`. If the closure returns an error, the method returns to the /// start of the token and returns the error with that position. fn scan_quoted_bytes<F>(&mut self, f: F) -> ScanResult<()> where F: FnMut(u8, bool) -> SyntaxResult<()>; /// Scans phrase: a normal or quoted word. /// /// This method behaves like [scan_quoted()](#tymethod.scan_quoted) if /// the next character is a double quote or like /// [scan_word()](#tymethod.scan_word) otherwise. fn scan_phrase<T, F>(&mut self, f: F) -> ScanResult<T> where F: FnOnce(&[u8]) -> SyntaxResult<T>; /// Scans a phrase and converts it into a string slice. /// /// This method is similar to [scan_phrase()](#tymethod.scan_phrase) /// but passes a string slice to the closure instead of a bytes slice. /// There are no allocations and the method syntax errors out if the /// content contains non-ASCII characters. fn scan_str_phrase<T, F>(&mut self, f: F) -> ScanResult<T> where F: FnOnce(&str) -> SyntaxResult<T> { self.scan_phrase(|slice| { f(try!(str::from_utf8(slice))) }) } /// Scans a phrase, processing the content characters separatedly. /// /// This method behaves like /// [scan_quoted_bytes()](#tymethod.scan_quoted_bytes) if /// the next character is a double quote or like /// [scan_word_bytes()](#tymethod.scan_word_bytes) otherwise. fn scan_phrase_bytes<F>(&mut self, f: F) -> ScanResult<()> where F: FnMut(u8, bool) -> SyntaxResult<()>; /// Scans a phrase and returns a copy of it. /// /// The copy will have all escape sequences translated. fn scan_phrase_copy(&mut self) -> ScanResult<Vec<u8>> { let mut res = Vec::new(); try!(self.scan_phrase_bytes(|ch, _| { res.push(ch); Ok(()) })); Ok(res) } /// Scans a newline. /// /// A newline is either an optional comment followed by either a CR or /// LF character or the end of file. The latter is so that a file lacking /// a line feed after its last line is still parsed successfully. fn scan_newline(&mut self) -> ScanResult<()>; /// Scans a non-empty sequence of space. /// /// There are two flavors of space. The simple form is any sequence /// of a space character `b' '` or a horizontal tab 'b`\t'`. However, /// a parenthesis can be used to turn [newlines](#tymethod.scan_newline) /// into normal space. This method recognises parentheses and acts /// accordingly. fn scan_space(&mut self) -> ScanResult<()>; /// Scans a possibly empty sequence of space. fn scan_opt_space(&mut self) -> ScanResult<()>; /// Skips over an entry. /// /// Keeps reading until it successfully scans a newline. The method /// tries to be smart about that and considers parentheses, quotes, and /// escapes but also tries its best to not fail. fn skip_entry(&mut self) -> ScanResult<()>; /// # Helper Methods /// /// Scans a phrase containing a 16 bit integer in decimal representation. fn scan_u16(&mut self) -> ScanResult<u16> { self.scan_phrase(|slice| { let slice = match str::from_utf8(slice) { Ok(slice) => slice, Err(_) => return Err(SyntaxError::IllegalInteger) }; Ok(try!(u16::from_str_radix(slice, 10))) }) } /// Scans a phrase containing a 32 bit integer in decimal representation. fn scan_u32(&mut self) -> ScanResult<u32> { self.scan_phrase(|slice| { let slice = match str::from_utf8(slice) { Ok(slice) => slice, Err(_) => return Err(SyntaxError::IllegalInteger) }; Ok(try!(u32::from_str_radix(slice, 10))) }) } /// Scans a word containing a sequence of pairs of hex digits. /// /// Each pair is translated to its byte value and passed to the /// closure `f`. fn scan_hex_word<F>(&mut self, mut f: F) -> ScanResult<()> where F: FnMut(u8) -> SyntaxResult<()> { self.scan_word(|mut slice| { while slice.len() >=2 { let (l, r) = slice.split_at(2); let res = try!(trans_hexdig(l[0])) << 4 | try!(trans_hexdig(l[1])); try!(f(res)); slice = r; } if slice.len() == 1 { Err(SyntaxError::Unexpected(slice[0])) } else { Ok(()) } }) } /// Skips over the word with the content `literal`. /// /// The content indeed needs to be literally the literal. Escapes are /// not translated before comparison and case has to be as is. fn skip_literal(&mut self, literal: &[u8]) -> ScanResult<()> { self.scan_word(|s| { if s == literal { Ok(()) } else { Err(SyntaxError::Expected(literal.into())) } }) } /// Scans a domain name and returns an owned domain name. /// /// If the name is relative, it is made absolute by appending `origin`. /// If there is no origin given, a syntax error is returned. fn scan_dname(&mut self, origin: Option<&DNameSlice>) -> ScanResult<DNameBuf> { let target = DNameBuilder::new(origin); self.scan_word_into(target, |target, b, escaped| { if b == b'.' && !escaped { target.end_label() } else { try!(target.push(b)) } Ok(()) }, |target| { Ok(try!(target.done())) }) } /// Scans a domain name into a bytes vec. /// /// The name is scanned and its wire format representation is appened /// to the end of `target`. If the scanned name is relative, it is made /// absolute by appending `origin`. If there is no origin given, a /// syntax error is returned. fn scan_dname_into(&mut self, origin: Option<&DNameSlice>, target: &mut Vec<u8>) -> ScanResult<()> { let target = DNameBuildInto::new(target, origin); try!(self.scan_word_into(target, |target, b, escaped| { if b == b'.' && !escaped { target.end_label() } else { try!(target.push(b)) } Ok(()) }, |target| { try!(target.done()); Ok(()) })); Ok(()) } /// Scans a character string and returns it as an owned value. fn scan_charstr(&mut self) -> ScanResult<CharStrBuf> { let mut res = Vec::new(); try!(self.scan_charstr_into(&mut res)); Ok(CharStrBuf::from_vec(res).unwrap()) } /// Scans a character string into a bytes vec. /// /// The string is scanned and its wire format representation is appened /// to the end of `target`. fn scan_charstr_into(&mut self, target: &mut Vec<u8>) -> ScanResult<()> { let mut len = 0; self.scan_phrase_bytes(|ch, _| { if len == 255 { Err(SyntaxError::LongCharStr) } else { target.push(ch); len += 1; Ok(()) } }) } } //------------ Helper Functions ---------------------------------------------- fn trans_hexdig(dig: u8) -> SyntaxResult<u8> { match dig { b'0' ... b'9' => Ok(dig - b'0'), b'A' ... b'F' => Ok(dig - b'A' + 10), b'a' ... b'f' => Ok(dig - b'a' + 10), _ => Err(SyntaxError::Unexpected(dig)) } }