tor_netdoc/doc/
microdesc.rs

1//! Parsing implementation for Tor microdescriptors.
2//!
3//! A "microdescriptor" is an incomplete, infrequently-changing
4//! summary of a relay's information that is generated by
5//! the directory authorities.
6//!
7//! Microdescriptors are much smaller than router descriptors, and
8//! change less frequently. For this reason, they're currently used
9//! for building circuits by all relays and clients.
10//!
11//! Microdescriptors can't be used on their own: you need to know
12//! which relay they are for, which requires a valid consensus
13//! directory.
14
15use crate::parse::keyword::Keyword;
16use crate::parse::parser::SectionRules;
17use crate::parse::tokenize::{ItemResult, NetDocReader};
18use crate::types::family::{RelayFamily, RelayFamilyId};
19use crate::types::misc::*;
20use crate::types::policy::PortPolicy;
21use crate::util;
22use crate::util::str::Extent;
23use crate::util::PeekableIterator;
24use crate::{AllowAnnotations, Error, NetdocErrorKind as EK, Result};
25use tor_error::internal;
26use tor_llcrypto::d;
27use tor_llcrypto::pk::{curve25519, ed25519, rsa};
28
29use digest::Digest;
30use once_cell::sync::Lazy;
31use std::str::FromStr as _;
32use std::sync::Arc;
33
34use std::time;
35
36#[cfg(feature = "build_docs")]
37mod build;
38
39#[cfg(feature = "build_docs")]
40pub use build::MicrodescBuilder;
41
42/// Annotations prepended to a microdescriptor that has been stored to
43/// disk.
44#[allow(dead_code)]
45#[derive(Clone, Debug, Default)]
46pub struct MicrodescAnnotation {
47    /// A time at which this microdescriptor was last listed in some
48    /// consensus document.
49    last_listed: Option<time::SystemTime>,
50}
51
52/// The digest of a microdescriptor as used in microdesc consensuses
53pub type MdDigest = [u8; 32];
54
55/// A single microdescriptor.
56#[allow(dead_code)]
57#[cfg_attr(
58    feature = "dangerous-expose-struct-fields",
59    visible::StructFields(pub),
60    non_exhaustive
61)]
62#[derive(Clone, Debug)]
63pub struct Microdesc {
64    /// The SHA256 digest of the text of this microdescriptor.  This
65    /// value is used to identify the microdescriptor when downloading
66    /// it, and when listing it in a consensus document.
67    // TODO: maybe this belongs somewhere else. Once it's used to store
68    // correlate the microdesc to a consensus, it's never used again.
69    #[cfg_attr(docsrs, doc(cfg(feature = "dangerous-expose-struct-fields")))]
70    sha256: MdDigest,
71    /// Public key used for the ntor circuit extension protocol.
72    #[cfg_attr(docsrs, doc(cfg(feature = "dangerous-expose-struct-fields")))]
73    ntor_onion_key: curve25519::PublicKey,
74    /// Declared family for this relay.
75    #[cfg_attr(docsrs, doc(cfg(feature = "dangerous-expose-struct-fields")))]
76    family: Arc<RelayFamily>,
77    /// List of IPv4 ports to which this relay will exit
78    #[cfg_attr(docsrs, doc(cfg(feature = "dangerous-expose-struct-fields")))]
79    ipv4_policy: Arc<PortPolicy>,
80    /// List of IPv6 ports to which this relay will exit
81    #[cfg_attr(docsrs, doc(cfg(feature = "dangerous-expose-struct-fields")))]
82    ipv6_policy: Arc<PortPolicy>,
83    /// Ed25519 identity for this relay
84    #[cfg_attr(docsrs, doc(cfg(feature = "dangerous-expose-struct-fields")))]
85    ed25519_id: ed25519::Ed25519Identity,
86    /// Family identities for this relay.
87    #[cfg_attr(docsrs, doc(cfg(feature = "dangerous-expose-struct-fields")))]
88    family_ids: Vec<RelayFamilyId>,
89    // addr is obsolete and doesn't go here any more
90    // pr is obsolete and doesn't go here any more.
91    // The legacy "tap" onion-key is obsolete, and though we parse it, we don't
92    // save it.
93}
94
95impl Microdesc {
96    /// Create a new MicrodescBuilder that can be used to construct
97    /// microdescriptors.
98    ///
99    /// This function is only available when the crate is built with the
100    /// `build_docs` feature.
101    ///
102    /// # Limitations
103    ///
104    /// The generated microdescriptors cannot yet be encoded, and do
105    /// not yet have correct sha256 digests. As such they are only
106    /// useful for testing.
107    #[cfg(feature = "build_docs")]
108    pub fn builder() -> MicrodescBuilder {
109        MicrodescBuilder::new()
110    }
111
112    /// Return the sha256 digest of this microdesc.
113    pub fn digest(&self) -> &MdDigest {
114        &self.sha256
115    }
116    /// Return the ntor onion key for this microdesc
117    pub fn ntor_key(&self) -> &curve25519::PublicKey {
118        &self.ntor_onion_key
119    }
120    /// Return the ipv4 exit policy for this microdesc
121    pub fn ipv4_policy(&self) -> &Arc<PortPolicy> {
122        &self.ipv4_policy
123    }
124    /// Return the ipv6 exit policy for this microdesc
125    pub fn ipv6_policy(&self) -> &Arc<PortPolicy> {
126        &self.ipv6_policy
127    }
128    /// Return the relay family for this microdesc
129    pub fn family(&self) -> &RelayFamily {
130        self.family.as_ref()
131    }
132    /// Return the ed25519 identity for this microdesc, if its
133    /// Ed25519 identity is well-formed.
134    pub fn ed25519_id(&self) -> &ed25519::Ed25519Identity {
135        &self.ed25519_id
136    }
137    /// Return a list of family ids for this microdesc.
138    pub fn family_ids(&self) -> &[RelayFamilyId] {
139        &self.family_ids[..]
140    }
141}
142
143/// A microdescriptor annotated with additional data
144///
145/// TODO: rename this.
146#[allow(dead_code)]
147#[derive(Clone, Debug)]
148pub struct AnnotatedMicrodesc {
149    /// The microdescriptor
150    md: Microdesc,
151    /// The annotations for the microdescriptor
152    ann: MicrodescAnnotation,
153    /// Where did we find the microdescriptor with the originally parsed
154    /// string?
155    location: Option<Extent>,
156}
157
158impl AnnotatedMicrodesc {
159    /// Consume this annotated microdesc and discard its annotations.
160    pub fn into_microdesc(self) -> Microdesc {
161        self.md
162    }
163
164    /// Return a reference to the microdescriptor within this annotated
165    /// microdescriptor.
166    pub fn md(&self) -> &Microdesc {
167        &self.md
168    }
169
170    /// If this Microdesc was parsed from `s`, return its original text.
171    pub fn within<'a>(&self, s: &'a str) -> Option<&'a str> {
172        self.location.as_ref().and_then(|ext| ext.reconstruct(s))
173    }
174}
175
176decl_keyword! {
177    /// Keyword type for recognized objects in microdescriptors.
178    MicrodescKwd {
179        annotation "@last-listed" => ANN_LAST_LISTED,
180        "onion-key" => ONION_KEY,
181        "ntor-onion-key" => NTOR_ONION_KEY,
182        "family" => FAMILY,
183        "family-ids" => FAMILY_IDS,
184        "p" => P,
185        "p6" => P6,
186        "id" => ID,
187    }
188}
189
190/// Rules about annotations that can appear before a Microdescriptor
191static MICRODESC_ANNOTATIONS: Lazy<SectionRules<MicrodescKwd>> = Lazy::new(|| {
192    use MicrodescKwd::*;
193    let mut rules = SectionRules::builder();
194    rules.add(ANN_LAST_LISTED.rule().args(1..));
195    rules.add(ANN_UNRECOGNIZED.rule().may_repeat().obj_optional());
196    // unrecognized annotations are okay; anything else is a bug in this
197    // context.
198    rules.reject_unrecognized();
199    rules.build()
200});
201/// Rules about entries that must appear in an Microdesc, and how they must
202/// be formed.
203static MICRODESC_RULES: Lazy<SectionRules<MicrodescKwd>> = Lazy::new(|| {
204    use MicrodescKwd::*;
205
206    let mut rules = SectionRules::builder();
207    rules.add(ONION_KEY.rule().required().no_args().obj_optional());
208    rules.add(NTOR_ONION_KEY.rule().required().args(1..));
209    rules.add(FAMILY.rule().args(1..));
210    rules.add(FAMILY_IDS.rule().args(0..));
211    rules.add(P.rule().args(2..));
212    rules.add(P6.rule().args(2..));
213    rules.add(ID.rule().may_repeat().args(2..));
214    rules.add(UNRECOGNIZED.rule().may_repeat().obj_optional());
215    rules.build()
216});
217
218impl MicrodescAnnotation {
219    /// Extract a (possibly empty) microdescriptor annotation from a
220    /// reader.
221    #[allow(dead_code)]
222    fn parse_from_reader(
223        reader: &mut NetDocReader<'_, MicrodescKwd>,
224    ) -> Result<MicrodescAnnotation> {
225        use MicrodescKwd::*;
226
227        let mut items = reader.pause_at(|item| item.is_ok_with_non_annotation());
228        let body = MICRODESC_ANNOTATIONS.parse(&mut items)?;
229
230        let last_listed = match body.get(ANN_LAST_LISTED) {
231            None => None,
232            Some(item) => Some(item.args_as_str().parse::<Iso8601TimeSp>()?.into()),
233        };
234
235        Ok(MicrodescAnnotation { last_listed })
236    }
237}
238
239impl Microdesc {
240    /// Parse a string into a new microdescriptor.
241    pub fn parse(s: &str) -> Result<Microdesc> {
242        let mut items = crate::parse::tokenize::NetDocReader::new(s)?;
243        let (result, _) = Self::parse_from_reader(&mut items).map_err(|e| e.within(s))?;
244        items.should_be_exhausted()?;
245        Ok(result)
246    }
247
248    /// Extract a single microdescriptor from a NetDocReader.
249    fn parse_from_reader(
250        reader: &mut NetDocReader<'_, MicrodescKwd>,
251    ) -> Result<(Microdesc, Option<Extent>)> {
252        use MicrodescKwd::*;
253        let s = reader.str();
254
255        let mut first_onion_key = true;
256        // We'll pause at the next annotation, or at the _second_ onion key.
257        let mut items = reader.pause_at(|item| match item {
258            Err(_) => false,
259            Ok(item) => {
260                item.kwd().is_annotation()
261                    || if item.kwd() == ONION_KEY {
262                        let was_first = first_onion_key;
263                        first_onion_key = false;
264                        !was_first
265                    } else {
266                        false
267                    }
268            }
269        });
270
271        let body = MICRODESC_RULES.parse(&mut items)?;
272
273        // We have to start with onion-key
274        let start_pos = {
275            // unwrap here is safe because parsing would have failed
276            // had there not been at least one item.
277            #[allow(clippy::unwrap_used)]
278            let first = body.first_item().unwrap();
279            if first.kwd() != ONION_KEY {
280                return Err(EK::WrongStartingToken
281                    .with_msg(first.kwd_str().to_string())
282                    .at_pos(first.pos()));
283            }
284            // Unwrap is safe here because we are parsing these strings from s
285            #[allow(clippy::unwrap_used)]
286            util::str::str_offset(s, first.kwd_str()).unwrap()
287        };
288
289        // Legacy (tap) onion key.  We parse this to make sure it's well-formed,
290        // but then we discard it immediately, since we never want to use it.
291        //
292        // In microdescriptors, the ONION_KEY field is mandatory, but its
293        // associated object is optional.
294        {
295            let tok = body.required(ONION_KEY)?;
296            if tok.has_obj() {
297                let _: rsa::PublicKey = tok
298                    .parse_obj::<RsaPublic>("RSA PUBLIC KEY")?
299                    .check_len_eq(1024)?
300                    .check_exponent(65537)?
301                    .into();
302            }
303        }
304
305        // Ntor onion key
306        let ntor_onion_key = body
307            .required(NTOR_ONION_KEY)?
308            .parse_arg::<Curve25519Public>(0)?
309            .into();
310
311        // family
312        //
313        // (We don't need to add the relay's own ID to this family, as we do in
314        // RouterDescs: the authorities already took care of that for us.)
315        let family = body
316            .maybe(FAMILY)
317            .parse_args_as_str::<RelayFamily>()?
318            .unwrap_or_else(RelayFamily::new)
319            .intern();
320
321        // Family ids (happy families case).
322        let family_ids = body
323            .maybe(FAMILY_IDS)
324            .args_as_str()
325            .unwrap_or("")
326            .split_ascii_whitespace()
327            .map(RelayFamilyId::from_str)
328            .collect::<Result<_>>()?;
329
330        // exit policies.
331        let ipv4_policy = body
332            .maybe(P)
333            .parse_args_as_str::<PortPolicy>()?
334            .unwrap_or_else(PortPolicy::new_reject_all);
335        let ipv6_policy = body
336            .maybe(P6)
337            .parse_args_as_str::<PortPolicy>()?
338            .unwrap_or_else(PortPolicy::new_reject_all);
339
340        // ed25519 identity
341        let ed25519_id = {
342            let id_tok = body
343                .slice(ID)
344                .iter()
345                .find(|item| item.arg(0) == Some("ed25519"));
346            match id_tok {
347                None => {
348                    return Err(EK::MissingToken.with_msg("id ed25519"));
349                }
350                Some(tok) => tok.parse_arg::<Ed25519Public>(1)?.into(),
351            }
352        };
353
354        let end_pos = {
355            // unwrap here is safe because parsing would have failed
356            // had there not been at least one item.
357            #[allow(clippy::unwrap_used)]
358            let last_item = body.last_item().unwrap();
359            last_item.offset_after(s).ok_or_else(|| {
360                Error::from(internal!("last item was not within source string"))
361                    .at_pos(last_item.end_pos())
362            })?
363        };
364
365        let text = &s[start_pos..end_pos];
366        let sha256 = d::Sha256::digest(text.as_bytes()).into();
367
368        let location = Extent::new(s, text);
369
370        let md = Microdesc {
371            sha256,
372            ntor_onion_key,
373            family,
374            ipv4_policy: ipv4_policy.intern(),
375            ipv6_policy: ipv6_policy.intern(),
376            ed25519_id,
377            family_ids,
378        };
379        Ok((md, location))
380    }
381}
382
383/// Consume tokens from 'reader' until the next token is the beginning
384/// of a microdescriptor: an annotation or an ONION_KEY.  If no such
385/// token exists, advance to the end of the reader.
386fn advance_to_next_microdesc(reader: &mut NetDocReader<'_, MicrodescKwd>, annotated: bool) {
387    use MicrodescKwd::*;
388    loop {
389        let item = reader.peek();
390        match item {
391            Some(Ok(t)) => {
392                let kwd = t.kwd();
393                if (annotated && kwd.is_annotation()) || kwd == ONION_KEY {
394                    return;
395                }
396            }
397            Some(Err(_)) => {
398                // We skip over broken tokens here.
399                //
400                // (This case can't happen in practice, since if there had been
401                // any error tokens, they would have been handled as part of
402                // handling the previous microdesc.)
403            }
404            None => {
405                return;
406            }
407        };
408        let _ = reader.next();
409    }
410}
411
412/// An iterator that parses one or more (possibly annotated)
413/// microdescriptors from a string.
414#[derive(Debug)]
415pub struct MicrodescReader<'a> {
416    /// True if we accept annotations; false otherwise.
417    annotated: bool,
418    /// An underlying reader to give us Items for the microdescriptors
419    reader: NetDocReader<'a, MicrodescKwd>,
420}
421
422impl<'a> MicrodescReader<'a> {
423    /// Construct a MicrodescReader to take microdescriptors from a string
424    /// 's'.
425    pub fn new(s: &'a str, allow: &AllowAnnotations) -> Result<Self> {
426        let reader = NetDocReader::new(s)?;
427        let annotated = allow == &AllowAnnotations::AnnotationsAllowed;
428        Ok(MicrodescReader { annotated, reader })
429    }
430
431    /// If we're annotated, parse an annotation from the reader. Otherwise
432    /// return a default annotation.
433    fn take_annotation(&mut self) -> Result<MicrodescAnnotation> {
434        if self.annotated {
435            MicrodescAnnotation::parse_from_reader(&mut self.reader)
436        } else {
437            Ok(MicrodescAnnotation::default())
438        }
439    }
440
441    /// Parse a (possibly annotated) microdescriptor from the reader.
442    ///
443    /// On error, parsing stops after the first failure.
444    fn take_annotated_microdesc_raw(&mut self) -> Result<AnnotatedMicrodesc> {
445        let ann = self.take_annotation()?;
446        let (md, location) = Microdesc::parse_from_reader(&mut self.reader)?;
447        Ok(AnnotatedMicrodesc { md, ann, location })
448    }
449
450    /// Parse a (possibly annotated) microdescriptor from the reader.
451    ///
452    /// On error, advance the reader to the start of the next microdescriptor.
453    fn take_annotated_microdesc(&mut self) -> Result<AnnotatedMicrodesc> {
454        let pos_orig = self.reader.pos();
455        let result = self.take_annotated_microdesc_raw();
456        if result.is_err() {
457            if self.reader.pos() == pos_orig {
458                // No tokens were consumed from the reader.  We need to
459                // drop at least one token to ensure we aren't looping.
460                //
461                // (This might not be able to happen, but it's easier to
462                // explicitly catch this case than it is to prove that
463                // it's impossible.)
464                let _ = self.reader.next();
465            }
466            advance_to_next_microdesc(&mut self.reader, self.annotated);
467        }
468        result
469    }
470}
471
472impl<'a> Iterator for MicrodescReader<'a> {
473    type Item = Result<AnnotatedMicrodesc>;
474    fn next(&mut self) -> Option<Self::Item> {
475        // If there is no next token, we're at the end.
476        self.reader.peek()?;
477
478        Some(
479            self.take_annotated_microdesc()
480                .map_err(|e| e.within(self.reader.str())),
481        )
482    }
483}
484
485#[cfg(test)]
486mod test {
487    // @@ begin test lint list maintained by maint/add_warning @@
488    #![allow(clippy::bool_assert_comparison)]
489    #![allow(clippy::clone_on_copy)]
490    #![allow(clippy::dbg_macro)]
491    #![allow(clippy::mixed_attributes_style)]
492    #![allow(clippy::print_stderr)]
493    #![allow(clippy::print_stdout)]
494    #![allow(clippy::single_char_pattern)]
495    #![allow(clippy::unwrap_used)]
496    #![allow(clippy::unchecked_duration_subtraction)]
497    #![allow(clippy::useless_vec)]
498    #![allow(clippy::needless_pass_by_value)]
499    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
500    use super::*;
501    use hex_literal::hex;
502    const TESTDATA: &str = include_str!("../../testdata/microdesc1.txt");
503    const TESTDATA2: &str = include_str!("../../testdata/microdesc2.txt");
504    const TESTDATA3: &str = include_str!("../../testdata/microdesc3.txt");
505    const TESTDATA4: &str = include_str!("../../testdata/microdesc4.txt");
506
507    fn read_bad(fname: &str) -> String {
508        use std::fs;
509        use std::path::PathBuf;
510        let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
511        path.push("testdata");
512        path.push("bad-mds");
513        path.push(fname);
514
515        fs::read_to_string(path).unwrap()
516    }
517
518    #[test]
519    fn parse_single() -> Result<()> {
520        let _md = Microdesc::parse(TESTDATA)?;
521        Ok(())
522    }
523
524    #[test]
525    fn parse_no_tap_key() -> Result<()> {
526        let _md = Microdesc::parse(TESTDATA3)?;
527        Ok(())
528    }
529
530    #[test]
531    fn parse_multi() -> Result<()> {
532        use humantime::parse_rfc3339;
533        let mds: Result<Vec<_>> =
534            MicrodescReader::new(TESTDATA2, &AllowAnnotations::AnnotationsAllowed)?.collect();
535        let mds = mds?;
536        assert_eq!(mds.len(), 4);
537
538        assert_eq!(
539            mds[0].ann.last_listed.unwrap(),
540            parse_rfc3339("2020-01-27T18:52:09Z").unwrap()
541        );
542        assert_eq!(
543            mds[0].md().digest(),
544            &hex!("38c71329a87098cb341c46c9c62bd646622b4445f7eb985a0e6adb23a22ccf4f")
545        );
546        assert_eq!(
547            mds[0].md().ntor_key().as_bytes(),
548            &hex!("5e895d65304a3a1894616660143f7af5757fe08bc18045c7855ee8debb9e6c47")
549        );
550        assert!(mds[0].md().ipv4_policy().allows_port(993));
551        assert!(mds[0].md().ipv6_policy().allows_port(993));
552        assert!(!mds[0].md().ipv4_policy().allows_port(25));
553        assert!(!mds[0].md().ipv6_policy().allows_port(25));
554        assert_eq!(
555            mds[0].md().ed25519_id().as_bytes(),
556            &hex!("2d85fdc88e6c1bcfb46897fca1dba6d1354f93261d68a79e0b5bc170dd923084")
557        );
558
559        Ok(())
560    }
561
562    #[test]
563    fn parse_family_ids() -> Result<()> {
564        let mds: Vec<AnnotatedMicrodesc> =
565            MicrodescReader::new(TESTDATA4, &AllowAnnotations::AnnotationsNotAllowed)?
566                .collect::<Result<_>>()?;
567        assert_eq!(mds.len(), 2);
568        let md0 = mds[0].md();
569        let md1 = mds[1].md();
570        assert_eq!(md0.family_ids().len(), 0);
571        assert_eq!(
572            md1.family_ids(),
573            &[
574                "ed25519:dXMgdGhlIHRyaXVtcGguICAgIC1UaG9tYXMgUGFpbmU"
575                    .parse()
576                    .unwrap(),
577                "other:Example".parse().unwrap()
578            ]
579        );
580        assert!(matches!(md1.family_ids()[0], RelayFamilyId::Ed25519(_)));
581
582        Ok(())
583    }
584
585    #[test]
586    fn test_bad() {
587        use crate::types::policy::PolicyError;
588        use crate::Pos;
589        fn check(fname: &str, e: &Error) {
590            let content = read_bad(fname);
591            let res = Microdesc::parse(&content);
592            assert!(res.is_err());
593            assert_eq!(&res.err().unwrap(), e);
594        }
595
596        check(
597            "wrong-start",
598            &EK::WrongStartingToken
599                .with_msg("family")
600                .at_pos(Pos::from_line(1, 1)),
601        );
602        check(
603            "bogus-policy",
604            &EK::BadPolicy
605                .at_pos(Pos::from_line(9, 1))
606                .with_source(PolicyError::InvalidPort),
607        );
608        check("wrong-id", &EK::MissingToken.with_msg("id ed25519"));
609    }
610
611    #[test]
612    fn test_recover() -> Result<()> {
613        let mut data = read_bad("wrong-start");
614        data += TESTDATA;
615        data += &read_bad("wrong-id");
616
617        let res: Vec<Result<_>> =
618            MicrodescReader::new(&data, &AllowAnnotations::AnnotationsAllowed)?.collect();
619
620        assert_eq!(res.len(), 3);
621        assert!(res[0].is_err());
622        assert!(res[1].is_ok());
623        assert!(res[2].is_err());
624        Ok(())
625    }
626}