trust_dns_proto/serialize/txt/
zone.rs

1// Copyright 2015-2023 Benjamin Fry <benjaminfry@me.com>
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use std::{collections::BTreeMap, str::FromStr};
9
10use crate::{
11    rr::{DNSClass, LowerName, Name, RData, Record, RecordSet, RecordType, RrKey},
12    serialize::txt::{
13        parse_rdata::RDataParser,
14        zone_lex::{Lexer, Token},
15        ParseError, ParseErrorKind, ParseResult,
16    },
17};
18
19/// ```text
20/// 5. ZONE FILES
21///
22/// Zone files are text files that contain RRs in text form.  Since the
23/// contents of a zone can be expressed in the form of a list of RRs a
24/// Zone File is most often used to define a zone, though it can be used
25/// to list a cache's contents.  Hence, this section first discusses the
26/// format of RRs in a Zone File, and then the special considerations when
27/// a Zone File is used to create a zone in some name server.
28///
29/// 5.1. Format
30///
31/// The format of these files is a sequence of entries.  Entries are
32/// predominantly line-oriented, though parentheses can be used to continue
33/// a list of items across a line boundary, and text literals can contain
34/// CRLF within the text.  Any combination of tabs and spaces act as a
35/// delimiter between the separate items that make up an entry.  The end of
36/// any line in the Zone File can end with a comment.  The comment starts
37/// with a ";" (semicolon).
38///
39/// The following entries are defined:
40///
41///     <blank>[<comment>]
42///
43///     $ORIGIN <domain-name> [<comment>]
44///
45///     $INCLUDE <file-name> [<domain-name>] [<comment>]
46///
47///     <domain-name><rr> [<comment>]
48///
49///     <blank><rr> [<comment>]
50///
51/// Blank lines, with or without comments, are allowed anywhere in the file.
52///
53/// Two control entries are defined: $ORIGIN and $INCLUDE.  $ORIGIN is
54/// followed by a domain name, and resets the current origin for relative
55/// domain names to the stated name.  $INCLUDE inserts the named file into
56/// the current file, and may optionally specify a domain name that sets the
57/// relative domain name origin for the included file.  $INCLUDE may also
58/// have a comment.  Note that a $INCLUDE entry never changes the relative
59/// origin of the parent file, regardless of changes to the relative origin
60/// made within the included file.
61///
62/// The last two forms represent RRs.  If an entry for an RR begins with a
63/// blank, then the RR is assumed to be owned by the last stated owner.  If
64/// an RR entry begins with a <domain-name>, then the owner name is reset.
65///
66/// <rr> contents take one of the following forms:
67///
68///     [<TTL>] [<class>] <type> <RDATA>
69///
70///     [<class>] [<TTL>] <type> <RDATA>
71///
72/// The RR begins with optional TTL and class fields, followed by a type and
73/// RDATA field appropriate to the type and class.  Class and type use the
74/// standard mnemonics, TTL is a decimal integer.  Omitted class and TTL
75/// values are default to the last explicitly stated values.  Since type and
76/// class mnemonics are disjoint, the parse is unique.  (Note that this
77/// order is different from the order used in examples and the order used in
78/// the actual RRs; the given order allows easier parsing and defaulting.)
79///
80/// <domain-name>s make up a large share of the data in the Zone File.
81/// The labels in the domain name are expressed as character strings and
82/// separated by dots.  Quoting conventions allow arbitrary characters to be
83/// stored in domain names.  Domain names that end in a dot are called
84/// absolute, and are taken as complete.  Domain names which do not end in a
85/// dot are called relative; the actual domain name is the concatenation of
86/// the relative part with an origin specified in a $ORIGIN, $INCLUDE, or as
87/// an argument to the Zone File loading routine.  A relative name is an
88/// error when no origin is available.
89///
90/// <character-string> is expressed in one or two ways: as a contiguous set
91/// of characters without interior spaces, or as a string beginning with a "
92/// and ending with a ".  Inside a " delimited string any character can
93/// occur, except for a " itself, which must be quoted using \ (back slash).
94///
95/// Because these files are text files several special encodings are
96/// necessary to allow arbitrary data to be loaded.  In particular:
97///
98///                 of the root.
99///
100/// @               A free standing @ is used to denote the current origin.
101///
102/// \X              where X is any character other than a digit (0-9), is
103///                 used to quote that character so that its special meaning
104///                 does not apply.  For example, "\." can be used to place
105///                 a dot character in a label.
106///
107/// \DDD            where each D is a digit is the octet corresponding to
108///                 the decimal number described by DDD.  The resulting
109///                 octet is assumed to be text and is not checked for
110///                 special meaning.
111///
112/// ( )             Parentheses are used to group data that crosses a line
113///                 boundary.  In effect, line terminations are not
114///                 recognized within parentheses.
115///
116/// ;               Semicolon is used to start a comment; the remainder of
117///                 the line is ignored.
118/// ```
119#[derive(Clone, Copy, Default)]
120pub struct Parser;
121
122impl Parser {
123    /// Returns a new Zone file parser
124    pub fn new() -> Self {
125        Self
126    }
127
128    /// Parse a file from the Lexer
129    ///
130    /// # Return
131    ///
132    /// A pair of the Zone origin name and a map of all Keys to RecordSets
133    pub fn parse(
134        &mut self,
135        lexer: Lexer<'_>,
136        origin: Option<Name>,
137    ) -> ParseResult<(Name, BTreeMap<RrKey, RecordSet>)> {
138        let mut lexer = lexer;
139        let mut records: BTreeMap<RrKey, RecordSet> = BTreeMap::new();
140
141        let mut origin: Option<Name> = origin;
142        let mut class: DNSClass = DNSClass::IN;
143        let mut current_name: Option<Name> = None;
144        let mut rtype: Option<RecordType> = None;
145        let mut ttl: Option<u32> = None;
146        let mut state = State::StartLine;
147
148        while let Some(t) = lexer.next_token()? {
149            state = match state {
150                State::StartLine => {
151                    // current_name is not reset on the next line b/c it might be needed from the previous
152                    rtype = None;
153
154                    match t {
155                        // if Dollar, then $INCLUDE or $ORIGIN
156                        Token::Include => {
157                            return Err(ParseError::from(ParseErrorKind::Message("The parser does not support $INCLUDE. Consider inlining file before parsing")))
158                        },
159                        Token::Origin => State::Origin,
160                        Token::Ttl => State::Ttl,
161
162                        // if CharData, then Name then ttl_class_type
163                        Token::CharData(data) => {
164                            current_name = Some(Name::parse(&data, origin.as_ref())?);
165                            State::TtlClassType
166                        }
167
168                        // @ is a placeholder for specifying the current origin
169                        Token::At => {
170                            current_name = origin.clone(); // TODO a COW or RC would reduce copies...
171                            State::TtlClassType
172                        }
173
174                        // if blank, then nothing or ttl_class_type
175                        Token::Blank => State::TtlClassType,
176                        Token::EOL => State::StartLine, // probably a comment
177                        _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
178                    }
179                }
180                State::Ttl => match t {
181                    Token::CharData(data) => {
182                        ttl = Some(Self::parse_time(&data)?);
183                        State::StartLine
184                    }
185                    _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
186                },
187                State::Origin => {
188                    match t {
189                        Token::CharData(data) => {
190                            // TODO an origin was specified, should this be legal? definitely confusing...
191                            origin = Some(Name::parse(&data, None)?);
192                            State::StartLine
193                        }
194                        _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
195                    }
196                }
197                State::Include => return Err(ParseError::from(ParseErrorKind::Message(
198                    "The parser does not support $INCLUDE. Consider inlining file before parsing",
199                ))),
200                State::TtlClassType => {
201                    match t {
202                        // if number, TTL
203                        // Token::Number(ref num) => ttl = Some(*num),
204                        // One of Class or Type (these cannot be overlapping!)
205                        Token::CharData(mut data) => {
206                            // if it's a number it's a ttl
207                            let result: ParseResult<u32> = Self::parse_time(&data);
208                            if result.is_ok() {
209                                ttl = result.ok();
210                                State::TtlClassType // hm, should this go to just ClassType?
211                            } else {
212                                // if can parse DNSClass, then class
213                                data.make_ascii_uppercase();
214                                let result = DNSClass::from_str(&data);
215                                if let Ok(parsed) = result {
216                                    class = parsed;
217                                    State::TtlClassType
218                                } else {
219                                    // if can parse RecordType, then RecordType
220                                    rtype = Some(RecordType::from_str(&data)?);
221                                    State::Record(vec![])
222                                }
223                            }
224                        }
225                        // could be nothing if started with blank and is a comment, i.e. EOL
226                        Token::EOL => {
227                            State::StartLine // next line
228                        }
229                        _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
230                    }
231                }
232                State::Record(record_parts) => {
233                    // b/c of ownership rules, perhaps, just collect all the RData components as a list of
234                    //  tokens to pass into the processor
235                    match t {
236                        Token::EOL => {
237                            Self::flush_record(
238                                record_parts,
239                                &origin,
240                                &current_name,
241                                rtype,
242                                &mut ttl,
243                                class,
244                                &mut records,
245                            )?;
246                            State::StartLine
247                        }
248                        Token::CharData(part) => {
249                            let mut record_parts = record_parts;
250                            record_parts.push(part);
251                            State::Record(record_parts)
252                        }
253                        // TODO: we should not tokenize the list...
254                        Token::List(list) => {
255                            let mut record_parts = record_parts;
256                            record_parts.extend(list);
257                            State::Record(record_parts)
258                        }
259                        _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
260                    }
261                }
262            }
263        }
264
265        //Extra flush at the end for the case of missing endline
266        if let State::Record(record_parts) = state {
267            Self::flush_record(
268                record_parts,
269                &origin,
270                &current_name,
271                rtype,
272                &mut ttl,
273                class,
274                &mut records,
275            )?;
276        }
277
278        //
279        // build the Authority and return.
280        let origin = origin.ok_or_else(|| {
281            ParseError::from(ParseErrorKind::Message("$ORIGIN was not specified"))
282        })?;
283        Ok((origin, records))
284    }
285
286    fn flush_record(
287        record_parts: Vec<String>,
288        origin: &Option<Name>,
289        current_name: &Option<Name>,
290        rtype: Option<RecordType>,
291        ttl: &mut Option<u32>,
292        class: DNSClass,
293        records: &mut BTreeMap<RrKey, RecordSet>,
294    ) -> ParseResult<()> {
295        // call out to parsers for difference record types
296        // all tokens as part of the Record should be chardata...
297        let rtype = rtype.ok_or_else(|| {
298            ParseError::from(ParseErrorKind::Message("record type not specified"))
299        })?;
300        let rdata = RData::parse(
301            rtype,
302            record_parts.iter().map(AsRef::as_ref),
303            origin.as_ref(),
304        )?;
305
306        // verify that we have everything we need for the record
307        let mut record = Record::new();
308        // TODO COW or RC would reduce mem usage, perhaps Name should have an intern()...
309        //  might want to wait until RC.weak() stabilizes, as that would be needed for global
310        //  memory where you want
311        record.set_name(current_name.clone().ok_or_else(|| {
312            ParseError::from(ParseErrorKind::Message("record name not specified"))
313        })?);
314        record.set_rr_type(rtype);
315        record.set_dns_class(class);
316
317        // slightly annoying, need to grab the TTL, then move rdata into the record,
318        //  then check the Type again and have custom add logic.
319        match rtype {
320            RecordType::SOA => {
321                // TTL for the SOA is set internally...
322                // expire is for the SOA, minimum is default for records
323                if let RData::SOA(ref soa) = rdata {
324                    // TODO, this looks wrong, get_expire() should be get_minimum(), right?
325                    record.set_ttl(soa.expire() as u32); // the spec seems a little inaccurate with u32 and i32
326                    if ttl.is_none() {
327                        *ttl = Some(soa.minimum());
328                    } // TODO: should this only set it if it's not set?
329                } else {
330                    let msg = format!("Invalid RData here, expected SOA: {rdata:?}");
331                    return ParseResult::Err(ParseError::from(ParseErrorKind::Msg(msg)));
332                }
333            }
334            _ => {
335                record.set_ttl(ttl.ok_or_else(|| {
336                    ParseError::from(ParseErrorKind::Message("record ttl not specified"))
337                })?);
338            }
339        }
340
341        // TODO: validate record, e.g. the name of SRV record allows _ but others do not.
342
343        // move the rdata into record...
344        record.set_data(Some(rdata));
345
346        // add to the map
347        let key = RrKey::new(LowerName::new(record.name()), record.record_type());
348        match rtype {
349            RecordType::SOA => {
350                let set = record.into();
351                if records.insert(key, set).is_some() {
352                    return Err(ParseErrorKind::Message("SOA is already specified").into());
353                }
354            }
355            _ => {
356                // add a Vec if it's not there, then add the record to the list
357                let set = records
358                    .entry(key)
359                    .or_insert_with(|| RecordSet::new(record.name(), record.record_type(), 0));
360                set.insert(record, 0);
361            }
362        }
363        Ok(())
364    }
365
366    /// parses the string following the rules from:
367    ///  <https://tools.ietf.org/html/rfc2308> (NXCaching RFC) and
368    ///  <http://www.zytrax.com/books/dns/apa/time.html>
369    ///
370    /// default is seconds
371    /// #s = seconds = # x 1 seconds (really!)
372    /// #m = minutes = # x 60 seconds
373    /// #h = hours   = # x 3600 seconds
374    /// #d = day     = # x 86400 seconds
375    /// #w = week    = # x 604800 seconds
376    ///
377    /// returns the result of the parsing or and error
378    ///
379    /// # Example
380    /// ```
381    /// use trust_dns_proto::serialize::txt::Parser;
382    ///
383    /// assert_eq!(Parser::parse_time("0").unwrap(),  0);
384    /// assert!(Parser::parse_time("s").is_err());
385    /// assert!(Parser::parse_time("").is_err());
386    /// assert_eq!(Parser::parse_time("0s").unwrap(), 0);
387    /// assert_eq!(Parser::parse_time("1").unwrap(),  1);
388    /// assert_eq!(Parser::parse_time("1S").unwrap(), 1);
389    /// assert_eq!(Parser::parse_time("1s").unwrap(), 1);
390    /// assert_eq!(Parser::parse_time("1M").unwrap(), 60);
391    /// assert_eq!(Parser::parse_time("1m").unwrap(), 60);
392    /// assert_eq!(Parser::parse_time("1H").unwrap(), 3600);
393    /// assert_eq!(Parser::parse_time("1h").unwrap(), 3600);
394    /// assert_eq!(Parser::parse_time("1D").unwrap(), 86400);
395    /// assert_eq!(Parser::parse_time("1d").unwrap(), 86400);
396    /// assert_eq!(Parser::parse_time("1W").unwrap(), 604800);
397    /// assert_eq!(Parser::parse_time("1w").unwrap(), 604800);
398    /// assert_eq!(Parser::parse_time("1s2d3w4h2m").unwrap(), 1+2*86400+3*604800+4*3600+2*60);
399    /// assert_eq!(Parser::parse_time("3w3w").unwrap(), 3*604800+3*604800);
400    /// assert!(Parser::parse_time("7102w").is_err());
401    /// ```
402    pub fn parse_time(ttl_str: &str) -> ParseResult<u32> {
403        if ttl_str.is_empty() {
404            return Err(ParseErrorKind::ParseTime(ttl_str.to_string()).into());
405        }
406
407        let (mut state, mut value) = (None, 0_u32);
408        for (i, c) in ttl_str.chars().enumerate() {
409            let start = match (state, c) {
410                (None, '0'..='9') => {
411                    state = Some(i);
412                    continue;
413                }
414                (Some(_), '0'..='9') => continue,
415                (Some(start), 'S' | 's' | 'M' | 'm' | 'H' | 'h' | 'D' | 'd' | 'W' | 'w') => start,
416                _ => return Err(ParseErrorKind::ParseTime(ttl_str.to_string()).into()),
417            };
418
419            // All allowed chars are ASCII, so using char indexes to slice &[u8] is OK
420            let number = u32::from_str(&ttl_str[start..i])
421                .map_err(|_| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
422
423            let multiplier = match c {
424                'S' | 's' => 1,
425                'M' | 'm' => 60,
426                'H' | 'h' => 3_600,
427                'D' | 'd' => 86_400,
428                'W' | 'w' => 604_800,
429                _ => unreachable!(),
430            };
431
432            value = number
433                .checked_mul(multiplier)
434                .and_then(|add| value.checked_add(add))
435                .ok_or_else(|| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
436
437            state = None;
438        }
439
440        if let Some(start) = state {
441            // All allowed chars are ASCII, so using char indexes to slice &[u8] is OK
442            let number = u32::from_str(&ttl_str[start..])
443                .map_err(|_| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
444            value = value
445                .checked_add(number)
446                .ok_or_else(|| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
447        }
448
449        Ok(value)
450    }
451}
452
453#[allow(unused)]
454enum State {
455    StartLine,    // start of line, @, $<WORD>, Name, Blank
456    TtlClassType, // [<TTL>] [<class>] <type>,
457    Ttl,          // $TTL <time>
458    Record(Vec<String>),
459    Include, // $INCLUDE <filename>
460    Origin,
461}
462
463#[cfg(test)]
464mod tests {
465    use super::*;
466
467    #[test]
468    #[allow(clippy::uninlined_format_args)]
469    fn test_zone_parse() {
470        let domain = Name::from_str("parameter.origin.org.").unwrap();
471
472        let zone_data = r#"$ORIGIN parsed.zone.origin.org.
473 faulty-record-type 60 IN A 1.2.3.4
474"#;
475
476        let lexer = Lexer::new(zone_data);
477        let result = Parser::new().parse(lexer, Some(domain));
478        assert!(
479            result.is_err()
480                & result
481                    .as_ref()
482                    .unwrap_err()
483                    .to_string()
484                    .contains("FAULTY-RECORD-TYPE"),
485            "unexpected success: {:#?}",
486            result
487        );
488    }
489}