hickory_proto/serialize/txt/
zone.rs

1// Copyright 2015-2023 Benjamin Fry <benjaminfry@me.com>
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// https://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// https://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use alloc::{
9    borrow::Cow,
10    collections::btree_map::{BTreeMap, Entry},
11    string::{String, ToString},
12    vec::Vec,
13};
14use core::{mem, str::FromStr};
15use std::{
16    fs,
17    path::{Path, PathBuf},
18};
19
20use crate::{
21    rr::{DNSClass, LowerName, Name, RData, Record, RecordSet, RecordType, RrKey},
22    serialize::txt::{
23        ParseError, ParseErrorKind, ParseResult,
24        parse_rdata::RDataParser,
25        zone_lex::{Lexer, Token},
26    },
27};
28
29/// ```text
30/// 5. ZONE FILES
31///
32/// Zone files are text files that contain RRs in text form.  Since the
33/// contents of a zone can be expressed in the form of a list of RRs a
34/// Zone File is most often used to define a zone, though it can be used
35/// to list a cache's contents.  Hence, this section first discusses the
36/// format of RRs in a Zone File, and then the special considerations when
37/// a Zone File is used to create a zone in some name server.
38///
39/// 5.1. Format
40///
41/// The format of these files is a sequence of entries.  Entries are
42/// predominantly line-oriented, though parentheses can be used to continue
43/// a list of items across a line boundary, and text literals can contain
44/// CRLF within the text.  Any combination of tabs and spaces act as a
45/// delimiter between the separate items that make up an entry.  The end of
46/// any line in the Zone File can end with a comment.  The comment starts
47/// with a ";" (semicolon).
48///
49/// The following entries are defined:
50///
51///     <blank>[<comment>]
52///
53///     $ORIGIN <domain-name> [<comment>]
54///
55///     $INCLUDE <file-name> [<domain-name>] [<comment>]
56///
57///     <domain-name><rr> [<comment>]
58///
59///     <blank><rr> [<comment>]
60///
61/// Blank lines, with or without comments, are allowed anywhere in the file.
62///
63/// Two control entries are defined: $ORIGIN and $INCLUDE.  $ORIGIN is
64/// followed by a domain name, and resets the current origin for relative
65/// domain names to the stated name.  $INCLUDE inserts the named file into
66/// the current file, and may optionally specify a domain name that sets the
67/// relative domain name origin for the included file.  $INCLUDE may also
68/// have a comment.  Note that a $INCLUDE entry never changes the relative
69/// origin of the parent file, regardless of changes to the relative origin
70/// made within the included file.
71///
72/// The last two forms represent RRs.  If an entry for an RR begins with a
73/// blank, then the RR is assumed to be owned by the last stated owner.  If
74/// an RR entry begins with a <domain-name>, then the owner name is reset.
75///
76/// <rr> contents take one of the following forms:
77///
78///     [<TTL>] [<class>] <type> <RDATA>
79///
80///     [<class>] [<TTL>] <type> <RDATA>
81///
82/// The RR begins with optional TTL and class fields, followed by a type and
83/// RDATA field appropriate to the type and class.  Class and type use the
84/// standard mnemonics, TTL is a decimal integer.  Omitted class and TTL
85/// values are default to the last explicitly stated values.  Since type and
86/// class mnemonics are disjoint, the parse is unique.  (Note that this
87/// order is different from the order used in examples and the order used in
88/// the actual RRs; the given order allows easier parsing and defaulting.)
89///
90/// <domain-name>s make up a large share of the data in the Zone File.
91/// The labels in the domain name are expressed as character strings and
92/// separated by dots.  Quoting conventions allow arbitrary characters to be
93/// stored in domain names.  Domain names that end in a dot are called
94/// absolute, and are taken as complete.  Domain names which do not end in a
95/// dot are called relative; the actual domain name is the concatenation of
96/// the relative part with an origin specified in a $ORIGIN, $INCLUDE, or as
97/// an argument to the Zone File loading routine.  A relative name is an
98/// error when no origin is available.
99///
100/// <character-string> is expressed in one or two ways: as a contiguous set
101/// of characters without interior spaces, or as a string beginning with a "
102/// and ending with a ".  Inside a " delimited string any character can
103/// occur, except for a " itself, which must be quoted using \ (back slash).
104///
105/// Because these files are text files several special encodings are
106/// necessary to allow arbitrary data to be loaded.  In particular:
107///
108///                 of the root.
109///
110/// @               A free standing @ is used to denote the current origin.
111///
112/// \X              where X is any character other than a digit (0-9), is
113///                 used to quote that character so that its special meaning
114///                 does not apply.  For example, "\." can be used to place
115///                 a dot character in a label.
116///
117/// \DDD            where each D is a digit is the octet corresponding to
118///                 the decimal number described by DDD.  The resulting
119///                 octet is assumed to be text and is not checked for
120///                 special meaning.
121///
122/// ( )             Parentheses are used to group data that crosses a line
123///                 boundary.  In effect, line terminations are not
124///                 recognized within parentheses.
125///
126/// ;               Semicolon is used to start a comment; the remainder of
127///                 the line is ignored.
128/// ```
129pub struct Parser<'a> {
130    lexers: Vec<(Lexer<'a>, Option<PathBuf>)>,
131    origin: Option<Name>,
132}
133
134impl<'a> Parser<'a> {
135    /// Returns a new Zone file parser
136    ///
137    /// The `path` argument's parent directory is used to resolve relative `$INCLUDE` paths.
138    /// Relative `$INCLUDE` paths will yield an error if `path` is `None`.
139    pub fn new(
140        input: impl Into<Cow<'a, str>>,
141        path: Option<PathBuf>,
142        origin: Option<Name>,
143    ) -> Self {
144        Self {
145            lexers: vec![(Lexer::new(input), path)],
146            origin,
147        }
148    }
149
150    /// Parse a file from the Lexer
151    ///
152    /// # Return
153    ///
154    /// A pair of the Zone origin name and a map of all Keys to RecordSets
155    pub fn parse(mut self) -> ParseResult<(Name, BTreeMap<RrKey, RecordSet>)> {
156        let mut cx = Context::new(self.origin);
157        let mut state = State::StartLine;
158        let mut stack = self.lexers.len();
159
160        'outer: while let Some((lexer, path)) = self.lexers.last_mut() {
161            while let Some(t) = lexer.next_token()? {
162                state = match state {
163                    State::StartLine => {
164                        // current_name is not reset on the next line b/c it might be needed from the previous
165                        cx.rtype = None;
166
167                        match t {
168                            // if Dollar, then $INCLUDE or $ORIGIN
169                            Token::Include => State::Include(None),
170                            Token::Origin => State::Origin,
171                            Token::Ttl => State::Ttl,
172
173                            // if CharData, then Name then ttl_class_type
174                            Token::CharData(data) => {
175                                cx.current_name = Some(Name::parse(&data, cx.origin.as_ref())?);
176                                State::TtlClassType
177                            }
178
179                            // @ is a placeholder for specifying the current origin
180                            Token::At => {
181                                cx.current_name.clone_from(&cx.origin); // TODO a COW or RC would reduce copies...
182                                State::TtlClassType
183                            }
184
185                            // if blank, then nothing or ttl_class_type
186                            Token::Blank => State::TtlClassType,
187                            Token::EOL => State::StartLine, // probably a comment
188                            _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
189                        }
190                    }
191                    State::Ttl => match t {
192                        Token::CharData(data) => {
193                            cx.ttl = Some(Self::parse_time(&data)?);
194                            State::StartLine
195                        }
196                        _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
197                    },
198                    State::Origin => {
199                        match t {
200                            Token::CharData(data) => {
201                                // TODO an origin was specified, should this be legal? definitely confusing...
202                                cx.origin = Some(Name::parse(&data, None)?);
203                                State::StartLine
204                            }
205                            _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
206                        }
207                    }
208                    State::Include(include_path) => match (t, include_path) {
209                        (Token::CharData(data), None) => State::Include(Some(data)),
210                        (Token::EOL, Some(include_path)) => {
211                            // RFC1035 (section 5) does not specify how filename for $INCLUDE
212                            // should be resolved into file path. The underlying code implements the
213                            // following:
214                            // * if the path is absolute (relies on Path::is_absolute), it uses normalized path
215                            // * otherwise, it joins the path with parent root of the current file
216                            //
217                            // TODO: Inlining files specified using non-relative path might potentially introduce
218                            // security issue in some cases (e.g. when working with zone files from untrusted sources)
219                            // and should probably be configurable by user.
220
221                            if stack > MAX_INCLUDE_LEVEL {
222                                return Err(ParseErrorKind::Message(
223                                    "Max depth level for nested $INCLUDE is reached",
224                                )
225                                .into());
226                            }
227
228                            let include = Path::new(&include_path);
229                            let include = match (include.is_absolute(), path) {
230                                (true, _) => include.to_path_buf(),
231                                (false, Some(path)) => path
232                                    .parent()
233                                    .expect("file has to have parent folder")
234                                    .join(include),
235                                (false, None) => {
236                                    return Err(ParseErrorKind::Message(
237                                        "Relative $INCLUDE is not supported",
238                                    )
239                                    .into());
240                                }
241                            };
242
243                            let input = fs::read_to_string(&include)?;
244                            let lexer = Lexer::new(input);
245                            self.lexers.push((lexer, Some(include)));
246                            stack += 1;
247                            state = State::StartLine;
248                            continue 'outer;
249                        }
250                        (Token::CharData(_), Some(_)) => {
251                            return Err(ParseErrorKind::Message(
252                                "Domain name for $INCLUDE is not supported",
253                            )
254                            .into());
255                        }
256                        (t, _) => {
257                            return Err(ParseErrorKind::UnexpectedToken(t).into());
258                        }
259                    },
260                    State::TtlClassType => {
261                        match t {
262                            // if number, TTL
263                            // Token::Number(num) => ttl = Some(*num),
264                            // One of Class or Type (these cannot be overlapping!)
265                            Token::CharData(mut data) => {
266                                // if it's a number it's a ttl
267                                let result: ParseResult<u32> = Self::parse_time(&data);
268                                if result.is_ok() {
269                                    cx.ttl = result.ok();
270                                    State::TtlClassType // hm, should this go to just ClassType?
271                                } else {
272                                    // if can parse DNSClass, then class
273                                    data.make_ascii_uppercase();
274                                    let result = DNSClass::from_str(&data);
275                                    if let Ok(parsed) = result {
276                                        cx.class = parsed;
277                                        State::TtlClassType
278                                    } else {
279                                        // if can parse RecordType, then RecordType
280                                        cx.rtype = Some(RecordType::from_str(&data)?);
281                                        State::Record(vec![])
282                                    }
283                                }
284                            }
285                            // could be nothing if started with blank and is a comment, i.e. EOL
286                            Token::EOL => {
287                                State::StartLine // next line
288                            }
289                            _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
290                        }
291                    }
292                    State::Record(record_parts) => {
293                        // b/c of ownership rules, perhaps, just collect all the RData components as a list of
294                        //  tokens to pass into the processor
295                        match t {
296                            Token::EOL => {
297                                cx.insert(record_parts)?;
298                                State::StartLine
299                            }
300                            Token::CharData(part) => {
301                                let mut record_parts = record_parts;
302                                record_parts.push(part);
303                                State::Record(record_parts)
304                            }
305                            // TODO: we should not tokenize the list...
306                            Token::List(list) => {
307                                let mut record_parts = record_parts;
308                                record_parts.extend(list);
309                                State::Record(record_parts)
310                            }
311                            _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
312                        }
313                    }
314                };
315            }
316
317            // Extra flush at the end for the case of missing endline
318            if let State::Record(record_parts) = mem::replace(&mut state, State::StartLine) {
319                cx.insert(record_parts)?;
320            }
321
322            stack -= 1;
323            self.lexers.pop();
324        }
325
326        //
327        // build the Authority and return.
328        let origin = cx.origin.ok_or_else(|| {
329            ParseError::from(ParseErrorKind::Message("$ORIGIN was not specified"))
330        })?;
331        Ok((origin, cx.records))
332    }
333
334    /// parses the string following the rules from:
335    ///  <https://tools.ietf.org/html/rfc2308> (NXCaching RFC) and
336    ///  <https://www.zytrax.com/books/dns/apa/time.html>
337    ///
338    /// default is seconds
339    /// #s = seconds = # x 1 seconds (really!)
340    /// #m = minutes = # x 60 seconds
341    /// #h = hours   = # x 3600 seconds
342    /// #d = day     = # x 86400 seconds
343    /// #w = week    = # x 604800 seconds
344    ///
345    /// returns the result of the parsing or and error
346    ///
347    /// # Example
348    /// ```
349    /// use hickory_proto::serialize::txt::Parser;
350    ///
351    /// assert_eq!(Parser::parse_time("0").unwrap(),  0);
352    /// assert!(Parser::parse_time("s").is_err());
353    /// assert!(Parser::parse_time("").is_err());
354    /// assert_eq!(Parser::parse_time("0s").unwrap(), 0);
355    /// assert_eq!(Parser::parse_time("1").unwrap(),  1);
356    /// assert_eq!(Parser::parse_time("1S").unwrap(), 1);
357    /// assert_eq!(Parser::parse_time("1s").unwrap(), 1);
358    /// assert_eq!(Parser::parse_time("1M").unwrap(), 60);
359    /// assert_eq!(Parser::parse_time("1m").unwrap(), 60);
360    /// assert_eq!(Parser::parse_time("1H").unwrap(), 3600);
361    /// assert_eq!(Parser::parse_time("1h").unwrap(), 3600);
362    /// assert_eq!(Parser::parse_time("1D").unwrap(), 86400);
363    /// assert_eq!(Parser::parse_time("1d").unwrap(), 86400);
364    /// assert_eq!(Parser::parse_time("1W").unwrap(), 604800);
365    /// assert_eq!(Parser::parse_time("1w").unwrap(), 604800);
366    /// assert_eq!(Parser::parse_time("1s2d3w4h2m").unwrap(), 1+2*86400+3*604800+4*3600+2*60);
367    /// assert_eq!(Parser::parse_time("3w3w").unwrap(), 3*604800+3*604800);
368    /// assert!(Parser::parse_time("7102w").is_err());
369    /// ```
370    pub fn parse_time(ttl_str: &str) -> ParseResult<u32> {
371        if ttl_str.is_empty() {
372            return Err(ParseErrorKind::ParseTime(ttl_str.to_string()).into());
373        }
374
375        let (mut state, mut value) = (None, 0_u32);
376        for (i, c) in ttl_str.chars().enumerate() {
377            let start = match (state, c) {
378                (None, '0'..='9') => {
379                    state = Some(i);
380                    continue;
381                }
382                (Some(_), '0'..='9') => continue,
383                (Some(start), 'S' | 's' | 'M' | 'm' | 'H' | 'h' | 'D' | 'd' | 'W' | 'w') => start,
384                _ => return Err(ParseErrorKind::ParseTime(ttl_str.to_string()).into()),
385            };
386
387            // All allowed chars are ASCII, so using char indexes to slice &[u8] is OK
388            let number = u32::from_str(&ttl_str[start..i])
389                .map_err(|_| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
390
391            let multiplier = match c {
392                'S' | 's' => 1,
393                'M' | 'm' => 60,
394                'H' | 'h' => 3_600,
395                'D' | 'd' => 86_400,
396                'W' | 'w' => 604_800,
397                _ => unreachable!(),
398            };
399
400            value = number
401                .checked_mul(multiplier)
402                .and_then(|add| value.checked_add(add))
403                .ok_or_else(|| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
404
405            state = None;
406        }
407
408        if let Some(start) = state {
409            // All allowed chars are ASCII, so using char indexes to slice &[u8] is OK
410            let number = u32::from_str(&ttl_str[start..])
411                .map_err(|_| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
412            value = value
413                .checked_add(number)
414                .ok_or_else(|| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
415        }
416
417        Ok(value)
418    }
419}
420
421struct Context {
422    origin: Option<Name>,
423    records: BTreeMap<RrKey, RecordSet>,
424    class: DNSClass,
425    current_name: Option<Name>,
426    rtype: Option<RecordType>,
427    ttl: Option<u32>,
428}
429
430impl Context {
431    fn new(origin: Option<Name>) -> Self {
432        Self {
433            origin,
434            records: BTreeMap::default(),
435            class: DNSClass::IN,
436            current_name: None,
437            rtype: None,
438            ttl: None,
439        }
440    }
441
442    fn insert(&mut self, record_parts: Vec<String>) -> ParseResult<()> {
443        // call out to parsers for difference record types
444        // all tokens as part of the Record should be chardata...
445        let rtype = self
446            .rtype
447            .ok_or_else(|| ParseError::from("record type not specified"))?;
448
449        let rdata = RData::parse(
450            rtype,
451            record_parts.iter().map(AsRef::as_ref),
452            self.origin.as_ref(),
453        )?;
454
455        // verify that we have everything we need for the record
456        // TODO COW or RC would reduce mem usage, perhaps Name should have an intern()...
457        //  might want to wait until RC.weak() stabilizes, as that would be needed for global
458        //  memory where you want
459        let mut name = self
460            .current_name
461            .clone()
462            .ok_or_else(|| ParseError::from("record name not specified"))?;
463
464        // slightly annoying, need to grab the TTL, then move rdata into the record,
465        //  then check the Type again and have custom add logic.
466        let set_ttl = match (rtype, self.ttl, &rdata) {
467            // TTL for the SOA is set internally...
468            // expire is for the SOA, minimum is default for records
469            (RecordType::SOA, _, RData::SOA(soa)) => {
470                // TODO, this looks wrong, get_expire() should be get_minimum(), right?
471                let set_ttl = soa.expire() as u32; // the spec seems a little inaccurate with u32 and i32
472                if self.ttl.is_none() {
473                    self.ttl = Some(soa.minimum());
474                } // TODO: should this only set it if it's not set?
475                set_ttl
476            }
477            (RecordType::SOA, _, _) => {
478                return ParseResult::Err(ParseError::from(format!(
479                    "invalid RData here, expected SOA: {rdata:?}"
480                )));
481            }
482            (_, Some(ttl), _) => ttl,
483            (_, None, _) => return Err(ParseError::from("record ttl not specified")),
484        };
485
486        // TODO: validate record, e.g. the name of SRV record allows _ but others do not.
487
488        // move the rdata into record...
489        name.set_fqdn(true);
490        let mut record = Record::from_rdata(name, set_ttl, rdata);
491        record.set_dns_class(self.class);
492
493        // add to the map
494        let entry = self.records.entry(RrKey::new(
495            LowerName::new(record.name()),
496            record.record_type(),
497        ));
498        match (rtype, entry) {
499            (RecordType::SOA, Entry::Occupied(_)) => {
500                return Err(ParseError::from("SOA is already specified"));
501            }
502            (_, Entry::Vacant(entry)) => {
503                entry.insert(RecordSet::from(record));
504            }
505            (_, Entry::Occupied(mut entry)) => {
506                entry.get_mut().insert(record, 0);
507            }
508        };
509
510        Ok(())
511    }
512}
513
514#[allow(unused)]
515enum State {
516    StartLine,    // start of line, @, $<WORD>, Name, Blank
517    TtlClassType, // [<TTL>] [<class>] <type>,
518    Ttl,          // $TTL <time>
519    Record(Vec<String>),
520    Include(Option<String>), // $INCLUDE <filename>
521    Origin,
522}
523
524/// Max traversal depth for $INCLUDE files
525const MAX_INCLUDE_LEVEL: usize = 256;
526
527#[cfg(test)]
528mod tests {
529    use alloc::string::ToString;
530
531    use super::*;
532
533    #[test]
534    #[allow(clippy::uninlined_format_args)]
535    fn test_zone_parse() {
536        let domain = Name::from_str("parameter.origin.org.").unwrap();
537
538        let zone_data = r#"$ORIGIN parsed.zone.origin.org.
539 faulty-record-type 60 IN A 1.2.3.4
540"#;
541
542        let result = Parser::new(zone_data, None, Some(domain)).parse();
543        assert!(
544            result.is_err()
545                & result
546                    .as_ref()
547                    .unwrap_err()
548                    .to_string()
549                    .contains("FAULTY-RECORD-TYPE"),
550            "unexpected success: {:#?}",
551            result
552        );
553    }
554}