tor_netdoc/doc/
microdesc.rs

1//! Parsing implementation for Tor microdescriptors.
2//!
3//! A "microdescriptor" is an incomplete, infrequently-changing
4//! summary of a relay's information that is generated by
5//! the directory authorities.
6//!
7//! Microdescriptors are much smaller than router descriptors, and
8//! change less frequently. For this reason, they're currently used
9//! for building circuits by all relays and clients.
10//!
11//! Microdescriptors can't be used on their own: you need to know
12//! which relay they are for, which requires a valid consensus
13//! directory.
14
15use crate::parse::keyword::Keyword;
16use crate::parse::parser::SectionRules;
17use crate::parse::tokenize::{ItemResult, NetDocReader};
18use crate::types::family::{RelayFamily, RelayFamilyId};
19use crate::types::misc::*;
20use crate::types::policy::PortPolicy;
21use crate::util;
22use crate::util::PeekableIterator;
23use crate::util::str::Extent;
24use crate::{AllowAnnotations, Error, NetdocErrorKind as EK, Result};
25use tor_error::internal;
26use tor_llcrypto::d;
27use tor_llcrypto::pk::{curve25519, ed25519, rsa};
28
29use digest::Digest;
30use std::str::FromStr as _;
31use std::sync::Arc;
32use std::sync::LazyLock;
33
34use std::time;
35
36#[cfg(feature = "build_docs")]
37mod build;
38
39#[cfg(feature = "build_docs")]
40pub use build::MicrodescBuilder;
41
42/// Length of a router microdescriptor digest
43pub const DOC_DIGEST_LEN: usize = 32;
44
45/// Annotations prepended to a microdescriptor that has been stored to
46/// disk.
47#[allow(dead_code)]
48#[derive(Clone, Debug, Default)]
49pub struct MicrodescAnnotation {
50    /// A time at which this microdescriptor was last listed in some
51    /// consensus document.
52    last_listed: Option<time::SystemTime>,
53}
54
55/// The digest of a microdescriptor as used in microdesc consensuses
56pub type MdDigest = [u8; DOC_DIGEST_LEN];
57
58/// A single microdescriptor.
59#[derive(Clone, Debug)]
60#[non_exhaustive]
61pub struct Microdesc {
62    /// The SHA256 digest of the text of this microdescriptor.  This
63    /// value is used to identify the microdescriptor when downloading
64    /// it, and when listing it in a consensus document.
65    // TODO: maybe this belongs somewhere else. Once it's used to store
66    // correlate the microdesc to a consensus, it's never used again.
67    pub sha256: MdDigest,
68    /// Public key used for the ntor circuit extension protocol.
69    pub ntor_onion_key: curve25519::PublicKey,
70    /// Declared family for this relay.
71    pub family: Arc<RelayFamily>,
72    /// List of IPv4 ports to which this relay will exit
73    pub ipv4_policy: Arc<PortPolicy>,
74    /// List of IPv6 ports to which this relay will exit
75    pub ipv6_policy: Arc<PortPolicy>,
76    /// Ed25519 identity for this relay
77    pub ed25519_id: ed25519::Ed25519Identity,
78    /// Family identities for this relay.
79    pub family_ids: Vec<RelayFamilyId>,
80    // addr is obsolete and doesn't go here any more
81    // pr is obsolete and doesn't go here any more.
82    // The legacy "tap" onion-key is obsolete, and though we parse it, we don't
83    // save it.
84}
85
86impl Microdesc {
87    /// Create a new MicrodescBuilder that can be used to construct
88    /// microdescriptors.
89    ///
90    /// This function is only available when the crate is built with the
91    /// `build_docs` feature.
92    ///
93    /// # Limitations
94    ///
95    /// The generated microdescriptors cannot yet be encoded, and do
96    /// not yet have correct sha256 digests. As such they are only
97    /// useful for testing.
98    #[cfg(feature = "build_docs")]
99    pub fn builder() -> MicrodescBuilder {
100        MicrodescBuilder::new()
101    }
102
103    /// Return the sha256 digest of this microdesc.
104    pub fn digest(&self) -> &MdDigest {
105        &self.sha256
106    }
107    /// Return the ntor onion key for this microdesc
108    pub fn ntor_key(&self) -> &curve25519::PublicKey {
109        &self.ntor_onion_key
110    }
111    /// Return the ipv4 exit policy for this microdesc
112    pub fn ipv4_policy(&self) -> &Arc<PortPolicy> {
113        &self.ipv4_policy
114    }
115    /// Return the ipv6 exit policy for this microdesc
116    pub fn ipv6_policy(&self) -> &Arc<PortPolicy> {
117        &self.ipv6_policy
118    }
119    /// Return the relay family for this microdesc
120    pub fn family(&self) -> &RelayFamily {
121        self.family.as_ref()
122    }
123    /// Return the ed25519 identity for this microdesc, if its
124    /// Ed25519 identity is well-formed.
125    pub fn ed25519_id(&self) -> &ed25519::Ed25519Identity {
126        &self.ed25519_id
127    }
128    /// Return a list of family ids for this microdesc.
129    pub fn family_ids(&self) -> &[RelayFamilyId] {
130        &self.family_ids[..]
131    }
132}
133
134/// A microdescriptor annotated with additional data
135///
136/// TODO: rename this.
137#[allow(dead_code)]
138#[derive(Clone, Debug)]
139pub struct AnnotatedMicrodesc {
140    /// The microdescriptor
141    md: Microdesc,
142    /// The annotations for the microdescriptor
143    ann: MicrodescAnnotation,
144    /// Where did we find the microdescriptor with the originally parsed
145    /// string?
146    location: Option<Extent>,
147}
148
149impl AnnotatedMicrodesc {
150    /// Consume this annotated microdesc and discard its annotations.
151    pub fn into_microdesc(self) -> Microdesc {
152        self.md
153    }
154
155    /// Return a reference to the microdescriptor within this annotated
156    /// microdescriptor.
157    pub fn md(&self) -> &Microdesc {
158        &self.md
159    }
160
161    /// If this Microdesc was parsed from `s`, return its original text.
162    pub fn within<'a>(&self, s: &'a str) -> Option<&'a str> {
163        self.location.as_ref().and_then(|ext| ext.reconstruct(s))
164    }
165}
166
167decl_keyword! {
168    /// Keyword type for recognized objects in microdescriptors.
169    MicrodescKwd {
170        annotation "@last-listed" => ANN_LAST_LISTED,
171        "onion-key" => ONION_KEY,
172        "ntor-onion-key" => NTOR_ONION_KEY,
173        "family" => FAMILY,
174        "family-ids" => FAMILY_IDS,
175        "p" => P,
176        "p6" => P6,
177        "id" => ID,
178    }
179}
180
181/// Rules about annotations that can appear before a Microdescriptor
182static MICRODESC_ANNOTATIONS: LazyLock<SectionRules<MicrodescKwd>> = LazyLock::new(|| {
183    use MicrodescKwd::*;
184    let mut rules = SectionRules::builder();
185    rules.add(ANN_LAST_LISTED.rule().args(1..));
186    rules.add(ANN_UNRECOGNIZED.rule().may_repeat().obj_optional());
187    // unrecognized annotations are okay; anything else is a bug in this
188    // context.
189    rules.reject_unrecognized();
190    rules.build()
191});
192/// Rules about entries that must appear in an Microdesc, and how they must
193/// be formed.
194static MICRODESC_RULES: LazyLock<SectionRules<MicrodescKwd>> = LazyLock::new(|| {
195    use MicrodescKwd::*;
196
197    let mut rules = SectionRules::builder();
198    rules.add(ONION_KEY.rule().required().no_args().obj_optional());
199    rules.add(NTOR_ONION_KEY.rule().required().args(1..));
200    rules.add(FAMILY.rule().args(1..));
201    rules.add(FAMILY_IDS.rule().args(0..));
202    rules.add(P.rule().args(2..));
203    rules.add(P6.rule().args(2..));
204    rules.add(ID.rule().may_repeat().args(2..));
205    rules.add(UNRECOGNIZED.rule().may_repeat().obj_optional());
206    rules.build()
207});
208
209impl MicrodescAnnotation {
210    /// Extract a (possibly empty) microdescriptor annotation from a
211    /// reader.
212    #[allow(dead_code)]
213    fn parse_from_reader(
214        reader: &mut NetDocReader<'_, MicrodescKwd>,
215    ) -> Result<MicrodescAnnotation> {
216        use MicrodescKwd::*;
217
218        let mut items = reader.pause_at(|item| item.is_ok_with_non_annotation());
219        let body = MICRODESC_ANNOTATIONS.parse(&mut items)?;
220
221        let last_listed = match body.get(ANN_LAST_LISTED) {
222            None => None,
223            Some(item) => Some(item.args_as_str().parse::<Iso8601TimeSp>()?.into()),
224        };
225
226        Ok(MicrodescAnnotation { last_listed })
227    }
228}
229
230impl Microdesc {
231    /// Parse a string into a new microdescriptor.
232    pub fn parse(s: &str) -> Result<Microdesc> {
233        let mut items = crate::parse::tokenize::NetDocReader::new(s)?;
234        let (result, _) = Self::parse_from_reader(&mut items).map_err(|e| e.within(s))?;
235        items.should_be_exhausted()?;
236        Ok(result)
237    }
238
239    /// Extract a single microdescriptor from a NetDocReader.
240    fn parse_from_reader(
241        reader: &mut NetDocReader<'_, MicrodescKwd>,
242    ) -> Result<(Microdesc, Option<Extent>)> {
243        use MicrodescKwd::*;
244        let s = reader.str();
245
246        let mut first_onion_key = true;
247        // We'll pause at the next annotation, or at the _second_ onion key.
248        let mut items = reader.pause_at(|item| match item {
249            Err(_) => false,
250            Ok(item) => {
251                item.kwd().is_annotation()
252                    || if item.kwd() == ONION_KEY {
253                        let was_first = first_onion_key;
254                        first_onion_key = false;
255                        !was_first
256                    } else {
257                        false
258                    }
259            }
260        });
261
262        let body = MICRODESC_RULES.parse(&mut items)?;
263
264        // We have to start with onion-key
265        let start_pos = {
266            // unwrap here is safe because parsing would have failed
267            // had there not been at least one item.
268            #[allow(clippy::unwrap_used)]
269            let first = body.first_item().unwrap();
270            if first.kwd() != ONION_KEY {
271                return Err(EK::WrongStartingToken
272                    .with_msg(first.kwd_str().to_string())
273                    .at_pos(first.pos()));
274            }
275            // Unwrap is safe here because we are parsing these strings from s
276            #[allow(clippy::unwrap_used)]
277            util::str::str_offset(s, first.kwd_str()).unwrap()
278        };
279
280        // Legacy (tap) onion key.  We parse this to make sure it's well-formed,
281        // but then we discard it immediately, since we never want to use it.
282        //
283        // In microdescriptors, the ONION_KEY field is mandatory, but its
284        // associated object is optional.
285        {
286            let tok = body.required(ONION_KEY)?;
287            if tok.has_obj() {
288                let _: rsa::PublicKey = tok
289                    .parse_obj::<RsaPublic>("RSA PUBLIC KEY")?
290                    .check_len_eq(1024)?
291                    .check_exponent(65537)?
292                    .into();
293            }
294        }
295
296        // Ntor onion key
297        let ntor_onion_key = body
298            .required(NTOR_ONION_KEY)?
299            .parse_arg::<Curve25519Public>(0)?
300            .into();
301
302        // family
303        //
304        // (We don't need to add the relay's own ID to this family, as we do in
305        // RouterDescs: the authorities already took care of that for us.)
306        let family = body
307            .maybe(FAMILY)
308            .parse_args_as_str::<RelayFamily>()?
309            .unwrap_or_else(RelayFamily::new)
310            .intern();
311
312        // Family ids (happy families case).
313        let family_ids = body
314            .maybe(FAMILY_IDS)
315            .args_as_str()
316            .unwrap_or("")
317            .split_ascii_whitespace()
318            .map(RelayFamilyId::from_str)
319            .collect::<Result<_>>()?;
320
321        // exit policies.
322        let ipv4_policy = body
323            .maybe(P)
324            .parse_args_as_str::<PortPolicy>()?
325            .unwrap_or_else(PortPolicy::new_reject_all);
326        let ipv6_policy = body
327            .maybe(P6)
328            .parse_args_as_str::<PortPolicy>()?
329            .unwrap_or_else(PortPolicy::new_reject_all);
330
331        // ed25519 identity
332        let ed25519_id = {
333            let id_tok = body
334                .slice(ID)
335                .iter()
336                .find(|item| item.arg(0) == Some("ed25519"));
337            match id_tok {
338                None => {
339                    return Err(EK::MissingToken.with_msg("id ed25519"));
340                }
341                Some(tok) => tok.parse_arg::<Ed25519Public>(1)?.into(),
342            }
343        };
344
345        let end_pos = {
346            // unwrap here is safe because parsing would have failed
347            // had there not been at least one item.
348            #[allow(clippy::unwrap_used)]
349            let last_item = body.last_item().unwrap();
350            last_item.offset_after(s).ok_or_else(|| {
351                Error::from(internal!("last item was not within source string"))
352                    .at_pos(last_item.end_pos())
353            })?
354        };
355
356        let text = &s[start_pos..end_pos];
357        let sha256 = d::Sha256::digest(text.as_bytes()).into();
358
359        let location = Extent::new(s, text);
360
361        let md = Microdesc {
362            sha256,
363            ntor_onion_key,
364            family,
365            ipv4_policy: ipv4_policy.intern(),
366            ipv6_policy: ipv6_policy.intern(),
367            ed25519_id,
368            family_ids,
369        };
370        Ok((md, location))
371    }
372}
373
374/// Consume tokens from 'reader' until the next token is the beginning
375/// of a microdescriptor: an annotation or an ONION_KEY.  If no such
376/// token exists, advance to the end of the reader.
377fn advance_to_next_microdesc(reader: &mut NetDocReader<'_, MicrodescKwd>, annotated: bool) {
378    use MicrodescKwd::*;
379    loop {
380        let item = reader.peek();
381        match item {
382            Some(Ok(t)) => {
383                let kwd = t.kwd();
384                if (annotated && kwd.is_annotation()) || kwd == ONION_KEY {
385                    return;
386                }
387            }
388            Some(Err(_)) => {
389                // We skip over broken tokens here.
390                //
391                // (This case can't happen in practice, since if there had been
392                // any error tokens, they would have been handled as part of
393                // handling the previous microdesc.)
394            }
395            None => {
396                return;
397            }
398        };
399        let _ = reader.next();
400    }
401}
402
403/// An iterator that parses one or more (possibly annotated)
404/// microdescriptors from a string.
405#[derive(Debug)]
406pub struct MicrodescReader<'a> {
407    /// True if we accept annotations; false otherwise.
408    annotated: bool,
409    /// An underlying reader to give us Items for the microdescriptors
410    reader: NetDocReader<'a, MicrodescKwd>,
411}
412
413impl<'a> MicrodescReader<'a> {
414    /// Construct a MicrodescReader to take microdescriptors from a string
415    /// 's'.
416    pub fn new(s: &'a str, allow: &AllowAnnotations) -> Result<Self> {
417        let reader = NetDocReader::new(s)?;
418        let annotated = allow == &AllowAnnotations::AnnotationsAllowed;
419        Ok(MicrodescReader { annotated, reader })
420    }
421
422    /// If we're annotated, parse an annotation from the reader. Otherwise
423    /// return a default annotation.
424    fn take_annotation(&mut self) -> Result<MicrodescAnnotation> {
425        if self.annotated {
426            MicrodescAnnotation::parse_from_reader(&mut self.reader)
427        } else {
428            Ok(MicrodescAnnotation::default())
429        }
430    }
431
432    /// Parse a (possibly annotated) microdescriptor from the reader.
433    ///
434    /// On error, parsing stops after the first failure.
435    fn take_annotated_microdesc_raw(&mut self) -> Result<AnnotatedMicrodesc> {
436        let ann = self.take_annotation()?;
437        let (md, location) = Microdesc::parse_from_reader(&mut self.reader)?;
438        Ok(AnnotatedMicrodesc { md, ann, location })
439    }
440
441    /// Parse a (possibly annotated) microdescriptor from the reader.
442    ///
443    /// On error, advance the reader to the start of the next microdescriptor.
444    fn take_annotated_microdesc(&mut self) -> Result<AnnotatedMicrodesc> {
445        let pos_orig = self.reader.pos();
446        let result = self.take_annotated_microdesc_raw();
447        if result.is_err() {
448            if self.reader.pos() == pos_orig {
449                // No tokens were consumed from the reader.  We need to
450                // drop at least one token to ensure we aren't looping.
451                //
452                // (This might not be able to happen, but it's easier to
453                // explicitly catch this case than it is to prove that
454                // it's impossible.)
455                let _ = self.reader.next();
456            }
457            advance_to_next_microdesc(&mut self.reader, self.annotated);
458        }
459        result
460    }
461}
462
463impl<'a> Iterator for MicrodescReader<'a> {
464    type Item = Result<AnnotatedMicrodesc>;
465    fn next(&mut self) -> Option<Self::Item> {
466        // If there is no next token, we're at the end.
467        self.reader.peek()?;
468
469        Some(
470            self.take_annotated_microdesc()
471                .map_err(|e| e.within(self.reader.str())),
472        )
473    }
474}
475
476#[cfg(test)]
477mod test {
478    // @@ begin test lint list maintained by maint/add_warning @@
479    #![allow(clippy::bool_assert_comparison)]
480    #![allow(clippy::clone_on_copy)]
481    #![allow(clippy::dbg_macro)]
482    #![allow(clippy::mixed_attributes_style)]
483    #![allow(clippy::print_stderr)]
484    #![allow(clippy::print_stdout)]
485    #![allow(clippy::single_char_pattern)]
486    #![allow(clippy::unwrap_used)]
487    #![allow(clippy::unchecked_duration_subtraction)]
488    #![allow(clippy::useless_vec)]
489    #![allow(clippy::needless_pass_by_value)]
490    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
491    use super::*;
492    use hex_literal::hex;
493    const TESTDATA: &str = include_str!("../../testdata/microdesc1.txt");
494    const TESTDATA2: &str = include_str!("../../testdata/microdesc2.txt");
495    const TESTDATA3: &str = include_str!("../../testdata/microdesc3.txt");
496    const TESTDATA4: &str = include_str!("../../testdata/microdesc4.txt");
497
498    fn read_bad(fname: &str) -> String {
499        use std::fs;
500        use std::path::PathBuf;
501        let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
502        path.push("testdata");
503        path.push("bad-mds");
504        path.push(fname);
505
506        fs::read_to_string(path).unwrap()
507    }
508
509    #[test]
510    fn parse_single() -> Result<()> {
511        let _md = Microdesc::parse(TESTDATA)?;
512        Ok(())
513    }
514
515    #[test]
516    fn parse_no_tap_key() -> Result<()> {
517        let _md = Microdesc::parse(TESTDATA3)?;
518        Ok(())
519    }
520
521    #[test]
522    fn parse_multi() -> Result<()> {
523        use humantime::parse_rfc3339;
524        let mds: Result<Vec<_>> =
525            MicrodescReader::new(TESTDATA2, &AllowAnnotations::AnnotationsAllowed)?.collect();
526        let mds = mds?;
527        assert_eq!(mds.len(), 4);
528
529        assert_eq!(
530            mds[0].ann.last_listed.unwrap(),
531            parse_rfc3339("2020-01-27T18:52:09Z").unwrap()
532        );
533        assert_eq!(
534            mds[0].md().digest(),
535            &hex!("38c71329a87098cb341c46c9c62bd646622b4445f7eb985a0e6adb23a22ccf4f")
536        );
537        assert_eq!(
538            mds[0].md().ntor_key().as_bytes(),
539            &hex!("5e895d65304a3a1894616660143f7af5757fe08bc18045c7855ee8debb9e6c47")
540        );
541        assert!(mds[0].md().ipv4_policy().allows_port(993));
542        assert!(mds[0].md().ipv6_policy().allows_port(993));
543        assert!(!mds[0].md().ipv4_policy().allows_port(25));
544        assert!(!mds[0].md().ipv6_policy().allows_port(25));
545        assert_eq!(
546            mds[0].md().ed25519_id().as_bytes(),
547            &hex!("2d85fdc88e6c1bcfb46897fca1dba6d1354f93261d68a79e0b5bc170dd923084")
548        );
549
550        Ok(())
551    }
552
553    #[test]
554    fn parse_family_ids() -> Result<()> {
555        let mds: Vec<AnnotatedMicrodesc> =
556            MicrodescReader::new(TESTDATA4, &AllowAnnotations::AnnotationsNotAllowed)?
557                .collect::<Result<_>>()?;
558        assert_eq!(mds.len(), 2);
559        let md0 = mds[0].md();
560        let md1 = mds[1].md();
561        assert_eq!(md0.family_ids().len(), 0);
562        assert_eq!(
563            md1.family_ids(),
564            &[
565                "ed25519:dXMgdGhlIHRyaXVtcGguICAgIC1UaG9tYXMgUGFpbmU"
566                    .parse()
567                    .unwrap(),
568                "other:Example".parse().unwrap()
569            ]
570        );
571        assert!(matches!(md1.family_ids()[0], RelayFamilyId::Ed25519(_)));
572
573        Ok(())
574    }
575
576    #[test]
577    fn test_bad() {
578        use crate::Pos;
579        use crate::types::policy::PolicyError;
580        fn check(fname: &str, e: &Error) {
581            let content = read_bad(fname);
582            let res = Microdesc::parse(&content);
583            assert!(res.is_err());
584            assert_eq!(&res.err().unwrap(), e);
585        }
586
587        check(
588            "wrong-start",
589            &EK::WrongStartingToken
590                .with_msg("family")
591                .at_pos(Pos::from_line(1, 1)),
592        );
593        check(
594            "bogus-policy",
595            &EK::BadPolicy
596                .at_pos(Pos::from_line(9, 1))
597                .with_source(PolicyError::InvalidPort),
598        );
599        check("wrong-id", &EK::MissingToken.with_msg("id ed25519"));
600    }
601
602    #[test]
603    fn test_recover() -> Result<()> {
604        let mut data = read_bad("wrong-start");
605        data += TESTDATA;
606        data += &read_bad("wrong-id");
607
608        let res: Vec<Result<_>> =
609            MicrodescReader::new(&data, &AllowAnnotations::AnnotationsAllowed)?.collect();
610
611        assert_eq!(res.len(), 3);
612        assert!(res[0].is_err());
613        assert!(res[1].is_ok());
614        assert!(res[2].is_err());
615        Ok(())
616    }
617}