hickory_proto/serialize/txt/zone.rs
1// Copyright 2015-2023 Benjamin Fry <benjaminfry@me.com>
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// https://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// https://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use alloc::{
9 borrow::Cow,
10 collections::btree_map::{BTreeMap, Entry},
11 string::{String, ToString},
12 vec::Vec,
13};
14use core::{mem, str::FromStr};
15use std::{
16 fs,
17 path::{Path, PathBuf},
18};
19
20use crate::{
21 rr::{DNSClass, LowerName, Name, RData, Record, RecordSet, RecordType, RrKey},
22 serialize::txt::{
23 ParseError, ParseErrorKind, ParseResult,
24 parse_rdata::RDataParser,
25 zone_lex::{Lexer, Token},
26 },
27};
28
29/// ```text
30/// 5. ZONE FILES
31///
32/// Zone files are text files that contain RRs in text form. Since the
33/// contents of a zone can be expressed in the form of a list of RRs a
34/// Zone File is most often used to define a zone, though it can be used
35/// to list a cache's contents. Hence, this section first discusses the
36/// format of RRs in a Zone File, and then the special considerations when
37/// a Zone File is used to create a zone in some name server.
38///
39/// 5.1. Format
40///
41/// The format of these files is a sequence of entries. Entries are
42/// predominantly line-oriented, though parentheses can be used to continue
43/// a list of items across a line boundary, and text literals can contain
44/// CRLF within the text. Any combination of tabs and spaces act as a
45/// delimiter between the separate items that make up an entry. The end of
46/// any line in the Zone File can end with a comment. The comment starts
47/// with a ";" (semicolon).
48///
49/// The following entries are defined:
50///
51/// <blank>[<comment>]
52///
53/// $ORIGIN <domain-name> [<comment>]
54///
55/// $INCLUDE <file-name> [<domain-name>] [<comment>]
56///
57/// <domain-name><rr> [<comment>]
58///
59/// <blank><rr> [<comment>]
60///
61/// Blank lines, with or without comments, are allowed anywhere in the file.
62///
63/// Two control entries are defined: $ORIGIN and $INCLUDE. $ORIGIN is
64/// followed by a domain name, and resets the current origin for relative
65/// domain names to the stated name. $INCLUDE inserts the named file into
66/// the current file, and may optionally specify a domain name that sets the
67/// relative domain name origin for the included file. $INCLUDE may also
68/// have a comment. Note that a $INCLUDE entry never changes the relative
69/// origin of the parent file, regardless of changes to the relative origin
70/// made within the included file.
71///
72/// The last two forms represent RRs. If an entry for an RR begins with a
73/// blank, then the RR is assumed to be owned by the last stated owner. If
74/// an RR entry begins with a <domain-name>, then the owner name is reset.
75///
76/// <rr> contents take one of the following forms:
77///
78/// [<TTL>] [<class>] <type> <RDATA>
79///
80/// [<class>] [<TTL>] <type> <RDATA>
81///
82/// The RR begins with optional TTL and class fields, followed by a type and
83/// RDATA field appropriate to the type and class. Class and type use the
84/// standard mnemonics, TTL is a decimal integer. Omitted class and TTL
85/// values are default to the last explicitly stated values. Since type and
86/// class mnemonics are disjoint, the parse is unique. (Note that this
87/// order is different from the order used in examples and the order used in
88/// the actual RRs; the given order allows easier parsing and defaulting.)
89///
90/// <domain-name>s make up a large share of the data in the Zone File.
91/// The labels in the domain name are expressed as character strings and
92/// separated by dots. Quoting conventions allow arbitrary characters to be
93/// stored in domain names. Domain names that end in a dot are called
94/// absolute, and are taken as complete. Domain names which do not end in a
95/// dot are called relative; the actual domain name is the concatenation of
96/// the relative part with an origin specified in a $ORIGIN, $INCLUDE, or as
97/// an argument to the Zone File loading routine. A relative name is an
98/// error when no origin is available.
99///
100/// <character-string> is expressed in one or two ways: as a contiguous set
101/// of characters without interior spaces, or as a string beginning with a "
102/// and ending with a ". Inside a " delimited string any character can
103/// occur, except for a " itself, which must be quoted using \ (back slash).
104///
105/// Because these files are text files several special encodings are
106/// necessary to allow arbitrary data to be loaded. In particular:
107///
108/// of the root.
109///
110/// @ A free standing @ is used to denote the current origin.
111///
112/// \X where X is any character other than a digit (0-9), is
113/// used to quote that character so that its special meaning
114/// does not apply. For example, "\." can be used to place
115/// a dot character in a label.
116///
117/// \DDD where each D is a digit is the octet corresponding to
118/// the decimal number described by DDD. The resulting
119/// octet is assumed to be text and is not checked for
120/// special meaning.
121///
122/// ( ) Parentheses are used to group data that crosses a line
123/// boundary. In effect, line terminations are not
124/// recognized within parentheses.
125///
126/// ; Semicolon is used to start a comment; the remainder of
127/// the line is ignored.
128/// ```
129pub struct Parser<'a> {
130 lexers: Vec<(Lexer<'a>, Option<PathBuf>)>,
131 origin: Option<Name>,
132}
133
134impl<'a> Parser<'a> {
135 /// Returns a new Zone file parser
136 ///
137 /// The `path` argument's parent directory is used to resolve relative `$INCLUDE` paths.
138 /// Relative `$INCLUDE` paths will yield an error if `path` is `None`.
139 pub fn new(
140 input: impl Into<Cow<'a, str>>,
141 path: Option<PathBuf>,
142 origin: Option<Name>,
143 ) -> Self {
144 Self {
145 lexers: vec![(Lexer::new(input), path)],
146 origin,
147 }
148 }
149
150 /// Parse a file from the Lexer
151 ///
152 /// # Return
153 ///
154 /// A pair of the Zone origin name and a map of all Keys to RecordSets
155 pub fn parse(mut self) -> ParseResult<(Name, BTreeMap<RrKey, RecordSet>)> {
156 let mut cx = Context::new(self.origin);
157 let mut state = State::StartLine;
158 let mut stack = self.lexers.len();
159
160 'outer: while let Some((lexer, path)) = self.lexers.last_mut() {
161 while let Some(t) = lexer.next_token()? {
162 state = match state {
163 State::StartLine => {
164 // current_name is not reset on the next line b/c it might be needed from the previous
165 cx.rtype = None;
166
167 match t {
168 // if Dollar, then $INCLUDE or $ORIGIN
169 Token::Include => State::Include(None),
170 Token::Origin => State::Origin,
171 Token::Ttl => State::Ttl,
172
173 // if CharData, then Name then ttl_class_type
174 Token::CharData(data) => {
175 cx.current_name = Some(Name::parse(&data, cx.origin.as_ref())?);
176 State::TtlClassType
177 }
178
179 // @ is a placeholder for specifying the current origin
180 Token::At => {
181 cx.current_name.clone_from(&cx.origin); // TODO a COW or RC would reduce copies...
182 State::TtlClassType
183 }
184
185 // if blank, then nothing or ttl_class_type
186 Token::Blank => State::TtlClassType,
187 Token::EOL => State::StartLine, // probably a comment
188 _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
189 }
190 }
191 State::Ttl => match t {
192 Token::CharData(data) => {
193 cx.ttl = Some(Self::parse_time(&data)?);
194 State::StartLine
195 }
196 _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
197 },
198 State::Origin => {
199 match t {
200 Token::CharData(data) => {
201 // TODO an origin was specified, should this be legal? definitely confusing...
202 cx.origin = Some(Name::parse(&data, None)?);
203 State::StartLine
204 }
205 _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
206 }
207 }
208 State::Include(include_path) => match (t, include_path) {
209 (Token::CharData(data), None) => State::Include(Some(data)),
210 (Token::EOL, Some(include_path)) => {
211 // RFC1035 (section 5) does not specify how filename for $INCLUDE
212 // should be resolved into file path. The underlying code implements the
213 // following:
214 // * if the path is absolute (relies on Path::is_absolute), it uses normalized path
215 // * otherwise, it joins the path with parent root of the current file
216 //
217 // TODO: Inlining files specified using non-relative path might potentially introduce
218 // security issue in some cases (e.g. when working with zone files from untrusted sources)
219 // and should probably be configurable by user.
220
221 if stack > MAX_INCLUDE_LEVEL {
222 return Err(ParseErrorKind::Message(
223 "Max depth level for nested $INCLUDE is reached",
224 )
225 .into());
226 }
227
228 let include = Path::new(&include_path);
229 let include = match (include.is_absolute(), path) {
230 (true, _) => include.to_path_buf(),
231 (false, Some(path)) => path
232 .parent()
233 .expect("file has to have parent folder")
234 .join(include),
235 (false, None) => {
236 return Err(ParseErrorKind::Message(
237 "Relative $INCLUDE is not supported",
238 )
239 .into());
240 }
241 };
242
243 let input = fs::read_to_string(&include)?;
244 let lexer = Lexer::new(input);
245 self.lexers.push((lexer, Some(include)));
246 stack += 1;
247 state = State::StartLine;
248 continue 'outer;
249 }
250 (Token::CharData(_), Some(_)) => {
251 return Err(ParseErrorKind::Message(
252 "Domain name for $INCLUDE is not supported",
253 )
254 .into());
255 }
256 (t, _) => {
257 return Err(ParseErrorKind::UnexpectedToken(t).into());
258 }
259 },
260 State::TtlClassType => {
261 match t {
262 // if number, TTL
263 // Token::Number(num) => ttl = Some(*num),
264 // One of Class or Type (these cannot be overlapping!)
265 Token::CharData(mut data) => {
266 // if it's a number it's a ttl
267 let result: ParseResult<u32> = Self::parse_time(&data);
268 if result.is_ok() {
269 cx.ttl = result.ok();
270 State::TtlClassType // hm, should this go to just ClassType?
271 } else {
272 // if can parse DNSClass, then class
273 data.make_ascii_uppercase();
274 let result = DNSClass::from_str(&data);
275 if let Ok(parsed) = result {
276 cx.class = parsed;
277 State::TtlClassType
278 } else {
279 // if can parse RecordType, then RecordType
280 cx.rtype = Some(RecordType::from_str(&data)?);
281 State::Record(vec![])
282 }
283 }
284 }
285 // could be nothing if started with blank and is a comment, i.e. EOL
286 Token::EOL => {
287 State::StartLine // next line
288 }
289 _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
290 }
291 }
292 State::Record(record_parts) => {
293 // b/c of ownership rules, perhaps, just collect all the RData components as a list of
294 // tokens to pass into the processor
295 match t {
296 Token::EOL => {
297 cx.insert(record_parts)?;
298 State::StartLine
299 }
300 Token::CharData(part) => {
301 let mut record_parts = record_parts;
302 record_parts.push(part);
303 State::Record(record_parts)
304 }
305 // TODO: we should not tokenize the list...
306 Token::List(list) => {
307 let mut record_parts = record_parts;
308 record_parts.extend(list);
309 State::Record(record_parts)
310 }
311 _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
312 }
313 }
314 };
315 }
316
317 // Extra flush at the end for the case of missing endline
318 if let State::Record(record_parts) = mem::replace(&mut state, State::StartLine) {
319 cx.insert(record_parts)?;
320 }
321
322 stack -= 1;
323 self.lexers.pop();
324 }
325
326 //
327 // build the Authority and return.
328 let origin = cx.origin.ok_or_else(|| {
329 ParseError::from(ParseErrorKind::Message("$ORIGIN was not specified"))
330 })?;
331 Ok((origin, cx.records))
332 }
333
334 /// parses the string following the rules from:
335 /// <https://tools.ietf.org/html/rfc2308> (NXCaching RFC) and
336 /// <https://www.zytrax.com/books/dns/apa/time.html>
337 ///
338 /// default is seconds
339 /// #s = seconds = # x 1 seconds (really!)
340 /// #m = minutes = # x 60 seconds
341 /// #h = hours = # x 3600 seconds
342 /// #d = day = # x 86400 seconds
343 /// #w = week = # x 604800 seconds
344 ///
345 /// returns the result of the parsing or and error
346 ///
347 /// # Example
348 /// ```
349 /// use hickory_proto::serialize::txt::Parser;
350 ///
351 /// assert_eq!(Parser::parse_time("0").unwrap(), 0);
352 /// assert!(Parser::parse_time("s").is_err());
353 /// assert!(Parser::parse_time("").is_err());
354 /// assert_eq!(Parser::parse_time("0s").unwrap(), 0);
355 /// assert_eq!(Parser::parse_time("1").unwrap(), 1);
356 /// assert_eq!(Parser::parse_time("1S").unwrap(), 1);
357 /// assert_eq!(Parser::parse_time("1s").unwrap(), 1);
358 /// assert_eq!(Parser::parse_time("1M").unwrap(), 60);
359 /// assert_eq!(Parser::parse_time("1m").unwrap(), 60);
360 /// assert_eq!(Parser::parse_time("1H").unwrap(), 3600);
361 /// assert_eq!(Parser::parse_time("1h").unwrap(), 3600);
362 /// assert_eq!(Parser::parse_time("1D").unwrap(), 86400);
363 /// assert_eq!(Parser::parse_time("1d").unwrap(), 86400);
364 /// assert_eq!(Parser::parse_time("1W").unwrap(), 604800);
365 /// assert_eq!(Parser::parse_time("1w").unwrap(), 604800);
366 /// assert_eq!(Parser::parse_time("1s2d3w4h2m").unwrap(), 1+2*86400+3*604800+4*3600+2*60);
367 /// assert_eq!(Parser::parse_time("3w3w").unwrap(), 3*604800+3*604800);
368 /// assert!(Parser::parse_time("7102w").is_err());
369 /// ```
370 pub fn parse_time(ttl_str: &str) -> ParseResult<u32> {
371 if ttl_str.is_empty() {
372 return Err(ParseErrorKind::ParseTime(ttl_str.to_string()).into());
373 }
374
375 let (mut state, mut value) = (None, 0_u32);
376 for (i, c) in ttl_str.chars().enumerate() {
377 let start = match (state, c) {
378 (None, '0'..='9') => {
379 state = Some(i);
380 continue;
381 }
382 (Some(_), '0'..='9') => continue,
383 (Some(start), 'S' | 's' | 'M' | 'm' | 'H' | 'h' | 'D' | 'd' | 'W' | 'w') => start,
384 _ => return Err(ParseErrorKind::ParseTime(ttl_str.to_string()).into()),
385 };
386
387 // All allowed chars are ASCII, so using char indexes to slice &[u8] is OK
388 let number = u32::from_str(&ttl_str[start..i])
389 .map_err(|_| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
390
391 let multiplier = match c {
392 'S' | 's' => 1,
393 'M' | 'm' => 60,
394 'H' | 'h' => 3_600,
395 'D' | 'd' => 86_400,
396 'W' | 'w' => 604_800,
397 _ => unreachable!(),
398 };
399
400 value = number
401 .checked_mul(multiplier)
402 .and_then(|add| value.checked_add(add))
403 .ok_or_else(|| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
404
405 state = None;
406 }
407
408 if let Some(start) = state {
409 // All allowed chars are ASCII, so using char indexes to slice &[u8] is OK
410 let number = u32::from_str(&ttl_str[start..])
411 .map_err(|_| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
412 value = value
413 .checked_add(number)
414 .ok_or_else(|| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
415 }
416
417 Ok(value)
418 }
419}
420
421struct Context {
422 origin: Option<Name>,
423 records: BTreeMap<RrKey, RecordSet>,
424 class: DNSClass,
425 current_name: Option<Name>,
426 rtype: Option<RecordType>,
427 ttl: Option<u32>,
428}
429
430impl Context {
431 fn new(origin: Option<Name>) -> Self {
432 Self {
433 origin,
434 records: BTreeMap::default(),
435 class: DNSClass::IN,
436 current_name: None,
437 rtype: None,
438 ttl: None,
439 }
440 }
441
442 fn insert(&mut self, record_parts: Vec<String>) -> ParseResult<()> {
443 // call out to parsers for difference record types
444 // all tokens as part of the Record should be chardata...
445 let rtype = self
446 .rtype
447 .ok_or_else(|| ParseError::from("record type not specified"))?;
448
449 let rdata = RData::parse(
450 rtype,
451 record_parts.iter().map(AsRef::as_ref),
452 self.origin.as_ref(),
453 )?;
454
455 // verify that we have everything we need for the record
456 // TODO COW or RC would reduce mem usage, perhaps Name should have an intern()...
457 // might want to wait until RC.weak() stabilizes, as that would be needed for global
458 // memory where you want
459 let mut name = self
460 .current_name
461 .clone()
462 .ok_or_else(|| ParseError::from("record name not specified"))?;
463
464 // slightly annoying, need to grab the TTL, then move rdata into the record,
465 // then check the Type again and have custom add logic.
466 let set_ttl = match (rtype, self.ttl, &rdata) {
467 // TTL for the SOA is set internally...
468 // expire is for the SOA, minimum is default for records
469 (RecordType::SOA, _, RData::SOA(soa)) => {
470 // TODO, this looks wrong, get_expire() should be get_minimum(), right?
471 let set_ttl = soa.expire() as u32; // the spec seems a little inaccurate with u32 and i32
472 if self.ttl.is_none() {
473 self.ttl = Some(soa.minimum());
474 } // TODO: should this only set it if it's not set?
475 set_ttl
476 }
477 (RecordType::SOA, _, _) => {
478 return ParseResult::Err(ParseError::from(format!(
479 "invalid RData here, expected SOA: {rdata:?}"
480 )));
481 }
482 (_, Some(ttl), _) => ttl,
483 (_, None, _) => return Err(ParseError::from("record ttl not specified")),
484 };
485
486 // TODO: validate record, e.g. the name of SRV record allows _ but others do not.
487
488 // move the rdata into record...
489 name.set_fqdn(true);
490 let mut record = Record::from_rdata(name, set_ttl, rdata);
491 record.set_dns_class(self.class);
492
493 // add to the map
494 let entry = self.records.entry(RrKey::new(
495 LowerName::new(record.name()),
496 record.record_type(),
497 ));
498 match (rtype, entry) {
499 (RecordType::SOA, Entry::Occupied(_)) => {
500 return Err(ParseError::from("SOA is already specified"));
501 }
502 (_, Entry::Vacant(entry)) => {
503 entry.insert(RecordSet::from(record));
504 }
505 (_, Entry::Occupied(mut entry)) => {
506 entry.get_mut().insert(record, 0);
507 }
508 };
509
510 Ok(())
511 }
512}
513
514#[allow(unused)]
515enum State {
516 StartLine, // start of line, @, $<WORD>, Name, Blank
517 TtlClassType, // [<TTL>] [<class>] <type>,
518 Ttl, // $TTL <time>
519 Record(Vec<String>),
520 Include(Option<String>), // $INCLUDE <filename>
521 Origin,
522}
523
524/// Max traversal depth for $INCLUDE files
525const MAX_INCLUDE_LEVEL: usize = 256;
526
527#[cfg(test)]
528mod tests {
529 use alloc::string::ToString;
530
531 use super::*;
532
533 #[test]
534 #[allow(clippy::uninlined_format_args)]
535 fn test_zone_parse() {
536 let domain = Name::from_str("parameter.origin.org.").unwrap();
537
538 let zone_data = r#"$ORIGIN parsed.zone.origin.org.
539 faulty-record-type 60 IN A 1.2.3.4
540"#;
541
542 let result = Parser::new(zone_data, None, Some(domain)).parse();
543 assert!(
544 result.is_err()
545 & result
546 .as_ref()
547 .unwrap_err()
548 .to_string()
549 .contains("FAULTY-RECORD-TYPE"),
550 "unexpected success: {:#?}",
551 result
552 );
553 }
554}