tor_netdoc/doc/
microdesc.rs

1//! Parsing implementation for Tor microdescriptors.
2//!
3//! A "microdescriptor" is an incomplete, infrequently-changing
4//! summary of a relay's information that is generated by
5//! the directory authorities.
6//!
7//! Microdescriptors are much smaller than router descriptors, and
8//! change less frequently. For this reason, they're currently used
9//! for building circuits by all relays and clients.
10//!
11//! Microdescriptors can't be used on their own: you need to know
12//! which relay they are for, which requires a valid consensus
13//! directory.
14
15use crate::parse::keyword::Keyword;
16use crate::parse::parser::SectionRules;
17use crate::parse::tokenize::{ItemResult, NetDocReader};
18use crate::types::family::{RelayFamily, RelayFamilyId};
19use crate::types::misc::*;
20use crate::types::policy::PortPolicy;
21use crate::util;
22use crate::util::PeekableIterator;
23use crate::util::str::Extent;
24use crate::{AllowAnnotations, Error, NetdocErrorKind as EK, Result};
25use tor_error::internal;
26use tor_llcrypto::d;
27use tor_llcrypto::pk::{curve25519, ed25519, rsa};
28
29use digest::Digest;
30use std::str::FromStr as _;
31use std::sync::Arc;
32use std::sync::LazyLock;
33
34use std::time;
35
36#[cfg(feature = "build_docs")]
37mod build;
38
39#[cfg(feature = "build_docs")]
40pub use build::MicrodescBuilder;
41
42/// Length of a router microdescriptor digest
43pub const DOC_DIGEST_LEN: usize = 32;
44
45/// Annotations prepended to a microdescriptor that has been stored to
46/// disk.
47#[allow(dead_code)]
48#[derive(Clone, Debug, Default)]
49pub struct MicrodescAnnotation {
50    /// A time at which this microdescriptor was last listed in some
51    /// consensus document.
52    last_listed: Option<time::SystemTime>,
53}
54
55/// The digest of a microdescriptor as used in microdesc consensuses
56pub type MdDigest = [u8; DOC_DIGEST_LEN];
57
58/// A single microdescriptor.
59#[allow(dead_code)]
60#[cfg_attr(
61    feature = "dangerous-expose-struct-fields",
62    visible::StructFields(pub),
63    non_exhaustive
64)]
65#[derive(Clone, Debug)]
66pub struct Microdesc {
67    /// The SHA256 digest of the text of this microdescriptor.  This
68    /// value is used to identify the microdescriptor when downloading
69    /// it, and when listing it in a consensus document.
70    // TODO: maybe this belongs somewhere else. Once it's used to store
71    // correlate the microdesc to a consensus, it's never used again.
72    #[cfg_attr(docsrs, doc(cfg(feature = "dangerous-expose-struct-fields")))]
73    sha256: MdDigest,
74    /// Public key used for the ntor circuit extension protocol.
75    #[cfg_attr(docsrs, doc(cfg(feature = "dangerous-expose-struct-fields")))]
76    ntor_onion_key: curve25519::PublicKey,
77    /// Declared family for this relay.
78    #[cfg_attr(docsrs, doc(cfg(feature = "dangerous-expose-struct-fields")))]
79    family: Arc<RelayFamily>,
80    /// List of IPv4 ports to which this relay will exit
81    #[cfg_attr(docsrs, doc(cfg(feature = "dangerous-expose-struct-fields")))]
82    ipv4_policy: Arc<PortPolicy>,
83    /// List of IPv6 ports to which this relay will exit
84    #[cfg_attr(docsrs, doc(cfg(feature = "dangerous-expose-struct-fields")))]
85    ipv6_policy: Arc<PortPolicy>,
86    /// Ed25519 identity for this relay
87    #[cfg_attr(docsrs, doc(cfg(feature = "dangerous-expose-struct-fields")))]
88    ed25519_id: ed25519::Ed25519Identity,
89    /// Family identities for this relay.
90    #[cfg_attr(docsrs, doc(cfg(feature = "dangerous-expose-struct-fields")))]
91    family_ids: Vec<RelayFamilyId>,
92    // addr is obsolete and doesn't go here any more
93    // pr is obsolete and doesn't go here any more.
94    // The legacy "tap" onion-key is obsolete, and though we parse it, we don't
95    // save it.
96}
97
98impl Microdesc {
99    /// Create a new MicrodescBuilder that can be used to construct
100    /// microdescriptors.
101    ///
102    /// This function is only available when the crate is built with the
103    /// `build_docs` feature.
104    ///
105    /// # Limitations
106    ///
107    /// The generated microdescriptors cannot yet be encoded, and do
108    /// not yet have correct sha256 digests. As such they are only
109    /// useful for testing.
110    #[cfg(feature = "build_docs")]
111    pub fn builder() -> MicrodescBuilder {
112        MicrodescBuilder::new()
113    }
114
115    /// Return the sha256 digest of this microdesc.
116    pub fn digest(&self) -> &MdDigest {
117        &self.sha256
118    }
119    /// Return the ntor onion key for this microdesc
120    pub fn ntor_key(&self) -> &curve25519::PublicKey {
121        &self.ntor_onion_key
122    }
123    /// Return the ipv4 exit policy for this microdesc
124    pub fn ipv4_policy(&self) -> &Arc<PortPolicy> {
125        &self.ipv4_policy
126    }
127    /// Return the ipv6 exit policy for this microdesc
128    pub fn ipv6_policy(&self) -> &Arc<PortPolicy> {
129        &self.ipv6_policy
130    }
131    /// Return the relay family for this microdesc
132    pub fn family(&self) -> &RelayFamily {
133        self.family.as_ref()
134    }
135    /// Return the ed25519 identity for this microdesc, if its
136    /// Ed25519 identity is well-formed.
137    pub fn ed25519_id(&self) -> &ed25519::Ed25519Identity {
138        &self.ed25519_id
139    }
140    /// Return a list of family ids for this microdesc.
141    pub fn family_ids(&self) -> &[RelayFamilyId] {
142        &self.family_ids[..]
143    }
144}
145
146/// A microdescriptor annotated with additional data
147///
148/// TODO: rename this.
149#[allow(dead_code)]
150#[derive(Clone, Debug)]
151pub struct AnnotatedMicrodesc {
152    /// The microdescriptor
153    md: Microdesc,
154    /// The annotations for the microdescriptor
155    ann: MicrodescAnnotation,
156    /// Where did we find the microdescriptor with the originally parsed
157    /// string?
158    location: Option<Extent>,
159}
160
161impl AnnotatedMicrodesc {
162    /// Consume this annotated microdesc and discard its annotations.
163    pub fn into_microdesc(self) -> Microdesc {
164        self.md
165    }
166
167    /// Return a reference to the microdescriptor within this annotated
168    /// microdescriptor.
169    pub fn md(&self) -> &Microdesc {
170        &self.md
171    }
172
173    /// If this Microdesc was parsed from `s`, return its original text.
174    pub fn within<'a>(&self, s: &'a str) -> Option<&'a str> {
175        self.location.as_ref().and_then(|ext| ext.reconstruct(s))
176    }
177}
178
179decl_keyword! {
180    /// Keyword type for recognized objects in microdescriptors.
181    MicrodescKwd {
182        annotation "@last-listed" => ANN_LAST_LISTED,
183        "onion-key" => ONION_KEY,
184        "ntor-onion-key" => NTOR_ONION_KEY,
185        "family" => FAMILY,
186        "family-ids" => FAMILY_IDS,
187        "p" => P,
188        "p6" => P6,
189        "id" => ID,
190    }
191}
192
193/// Rules about annotations that can appear before a Microdescriptor
194static MICRODESC_ANNOTATIONS: LazyLock<SectionRules<MicrodescKwd>> = LazyLock::new(|| {
195    use MicrodescKwd::*;
196    let mut rules = SectionRules::builder();
197    rules.add(ANN_LAST_LISTED.rule().args(1..));
198    rules.add(ANN_UNRECOGNIZED.rule().may_repeat().obj_optional());
199    // unrecognized annotations are okay; anything else is a bug in this
200    // context.
201    rules.reject_unrecognized();
202    rules.build()
203});
204/// Rules about entries that must appear in an Microdesc, and how they must
205/// be formed.
206static MICRODESC_RULES: LazyLock<SectionRules<MicrodescKwd>> = LazyLock::new(|| {
207    use MicrodescKwd::*;
208
209    let mut rules = SectionRules::builder();
210    rules.add(ONION_KEY.rule().required().no_args().obj_optional());
211    rules.add(NTOR_ONION_KEY.rule().required().args(1..));
212    rules.add(FAMILY.rule().args(1..));
213    rules.add(FAMILY_IDS.rule().args(0..));
214    rules.add(P.rule().args(2..));
215    rules.add(P6.rule().args(2..));
216    rules.add(ID.rule().may_repeat().args(2..));
217    rules.add(UNRECOGNIZED.rule().may_repeat().obj_optional());
218    rules.build()
219});
220
221impl MicrodescAnnotation {
222    /// Extract a (possibly empty) microdescriptor annotation from a
223    /// reader.
224    #[allow(dead_code)]
225    fn parse_from_reader(
226        reader: &mut NetDocReader<'_, MicrodescKwd>,
227    ) -> Result<MicrodescAnnotation> {
228        use MicrodescKwd::*;
229
230        let mut items = reader.pause_at(|item| item.is_ok_with_non_annotation());
231        let body = MICRODESC_ANNOTATIONS.parse(&mut items)?;
232
233        let last_listed = match body.get(ANN_LAST_LISTED) {
234            None => None,
235            Some(item) => Some(item.args_as_str().parse::<Iso8601TimeSp>()?.into()),
236        };
237
238        Ok(MicrodescAnnotation { last_listed })
239    }
240}
241
242impl Microdesc {
243    /// Parse a string into a new microdescriptor.
244    pub fn parse(s: &str) -> Result<Microdesc> {
245        let mut items = crate::parse::tokenize::NetDocReader::new(s)?;
246        let (result, _) = Self::parse_from_reader(&mut items).map_err(|e| e.within(s))?;
247        items.should_be_exhausted()?;
248        Ok(result)
249    }
250
251    /// Extract a single microdescriptor from a NetDocReader.
252    fn parse_from_reader(
253        reader: &mut NetDocReader<'_, MicrodescKwd>,
254    ) -> Result<(Microdesc, Option<Extent>)> {
255        use MicrodescKwd::*;
256        let s = reader.str();
257
258        let mut first_onion_key = true;
259        // We'll pause at the next annotation, or at the _second_ onion key.
260        let mut items = reader.pause_at(|item| match item {
261            Err(_) => false,
262            Ok(item) => {
263                item.kwd().is_annotation()
264                    || if item.kwd() == ONION_KEY {
265                        let was_first = first_onion_key;
266                        first_onion_key = false;
267                        !was_first
268                    } else {
269                        false
270                    }
271            }
272        });
273
274        let body = MICRODESC_RULES.parse(&mut items)?;
275
276        // We have to start with onion-key
277        let start_pos = {
278            // unwrap here is safe because parsing would have failed
279            // had there not been at least one item.
280            #[allow(clippy::unwrap_used)]
281            let first = body.first_item().unwrap();
282            if first.kwd() != ONION_KEY {
283                return Err(EK::WrongStartingToken
284                    .with_msg(first.kwd_str().to_string())
285                    .at_pos(first.pos()));
286            }
287            // Unwrap is safe here because we are parsing these strings from s
288            #[allow(clippy::unwrap_used)]
289            util::str::str_offset(s, first.kwd_str()).unwrap()
290        };
291
292        // Legacy (tap) onion key.  We parse this to make sure it's well-formed,
293        // but then we discard it immediately, since we never want to use it.
294        //
295        // In microdescriptors, the ONION_KEY field is mandatory, but its
296        // associated object is optional.
297        {
298            let tok = body.required(ONION_KEY)?;
299            if tok.has_obj() {
300                let _: rsa::PublicKey = tok
301                    .parse_obj::<RsaPublic>("RSA PUBLIC KEY")?
302                    .check_len_eq(1024)?
303                    .check_exponent(65537)?
304                    .into();
305            }
306        }
307
308        // Ntor onion key
309        let ntor_onion_key = body
310            .required(NTOR_ONION_KEY)?
311            .parse_arg::<Curve25519Public>(0)?
312            .into();
313
314        // family
315        //
316        // (We don't need to add the relay's own ID to this family, as we do in
317        // RouterDescs: the authorities already took care of that for us.)
318        let family = body
319            .maybe(FAMILY)
320            .parse_args_as_str::<RelayFamily>()?
321            .unwrap_or_else(RelayFamily::new)
322            .intern();
323
324        // Family ids (happy families case).
325        let family_ids = body
326            .maybe(FAMILY_IDS)
327            .args_as_str()
328            .unwrap_or("")
329            .split_ascii_whitespace()
330            .map(RelayFamilyId::from_str)
331            .collect::<Result<_>>()?;
332
333        // exit policies.
334        let ipv4_policy = body
335            .maybe(P)
336            .parse_args_as_str::<PortPolicy>()?
337            .unwrap_or_else(PortPolicy::new_reject_all);
338        let ipv6_policy = body
339            .maybe(P6)
340            .parse_args_as_str::<PortPolicy>()?
341            .unwrap_or_else(PortPolicy::new_reject_all);
342
343        // ed25519 identity
344        let ed25519_id = {
345            let id_tok = body
346                .slice(ID)
347                .iter()
348                .find(|item| item.arg(0) == Some("ed25519"));
349            match id_tok {
350                None => {
351                    return Err(EK::MissingToken.with_msg("id ed25519"));
352                }
353                Some(tok) => tok.parse_arg::<Ed25519Public>(1)?.into(),
354            }
355        };
356
357        let end_pos = {
358            // unwrap here is safe because parsing would have failed
359            // had there not been at least one item.
360            #[allow(clippy::unwrap_used)]
361            let last_item = body.last_item().unwrap();
362            last_item.offset_after(s).ok_or_else(|| {
363                Error::from(internal!("last item was not within source string"))
364                    .at_pos(last_item.end_pos())
365            })?
366        };
367
368        let text = &s[start_pos..end_pos];
369        let sha256 = d::Sha256::digest(text.as_bytes()).into();
370
371        let location = Extent::new(s, text);
372
373        let md = Microdesc {
374            sha256,
375            ntor_onion_key,
376            family,
377            ipv4_policy: ipv4_policy.intern(),
378            ipv6_policy: ipv6_policy.intern(),
379            ed25519_id,
380            family_ids,
381        };
382        Ok((md, location))
383    }
384}
385
386/// Consume tokens from 'reader' until the next token is the beginning
387/// of a microdescriptor: an annotation or an ONION_KEY.  If no such
388/// token exists, advance to the end of the reader.
389fn advance_to_next_microdesc(reader: &mut NetDocReader<'_, MicrodescKwd>, annotated: bool) {
390    use MicrodescKwd::*;
391    loop {
392        let item = reader.peek();
393        match item {
394            Some(Ok(t)) => {
395                let kwd = t.kwd();
396                if (annotated && kwd.is_annotation()) || kwd == ONION_KEY {
397                    return;
398                }
399            }
400            Some(Err(_)) => {
401                // We skip over broken tokens here.
402                //
403                // (This case can't happen in practice, since if there had been
404                // any error tokens, they would have been handled as part of
405                // handling the previous microdesc.)
406            }
407            None => {
408                return;
409            }
410        };
411        let _ = reader.next();
412    }
413}
414
415/// An iterator that parses one or more (possibly annotated)
416/// microdescriptors from a string.
417#[derive(Debug)]
418pub struct MicrodescReader<'a> {
419    /// True if we accept annotations; false otherwise.
420    annotated: bool,
421    /// An underlying reader to give us Items for the microdescriptors
422    reader: NetDocReader<'a, MicrodescKwd>,
423}
424
425impl<'a> MicrodescReader<'a> {
426    /// Construct a MicrodescReader to take microdescriptors from a string
427    /// 's'.
428    pub fn new(s: &'a str, allow: &AllowAnnotations) -> Result<Self> {
429        let reader = NetDocReader::new(s)?;
430        let annotated = allow == &AllowAnnotations::AnnotationsAllowed;
431        Ok(MicrodescReader { annotated, reader })
432    }
433
434    /// If we're annotated, parse an annotation from the reader. Otherwise
435    /// return a default annotation.
436    fn take_annotation(&mut self) -> Result<MicrodescAnnotation> {
437        if self.annotated {
438            MicrodescAnnotation::parse_from_reader(&mut self.reader)
439        } else {
440            Ok(MicrodescAnnotation::default())
441        }
442    }
443
444    /// Parse a (possibly annotated) microdescriptor from the reader.
445    ///
446    /// On error, parsing stops after the first failure.
447    fn take_annotated_microdesc_raw(&mut self) -> Result<AnnotatedMicrodesc> {
448        let ann = self.take_annotation()?;
449        let (md, location) = Microdesc::parse_from_reader(&mut self.reader)?;
450        Ok(AnnotatedMicrodesc { md, ann, location })
451    }
452
453    /// Parse a (possibly annotated) microdescriptor from the reader.
454    ///
455    /// On error, advance the reader to the start of the next microdescriptor.
456    fn take_annotated_microdesc(&mut self) -> Result<AnnotatedMicrodesc> {
457        let pos_orig = self.reader.pos();
458        let result = self.take_annotated_microdesc_raw();
459        if result.is_err() {
460            if self.reader.pos() == pos_orig {
461                // No tokens were consumed from the reader.  We need to
462                // drop at least one token to ensure we aren't looping.
463                //
464                // (This might not be able to happen, but it's easier to
465                // explicitly catch this case than it is to prove that
466                // it's impossible.)
467                let _ = self.reader.next();
468            }
469            advance_to_next_microdesc(&mut self.reader, self.annotated);
470        }
471        result
472    }
473}
474
475impl<'a> Iterator for MicrodescReader<'a> {
476    type Item = Result<AnnotatedMicrodesc>;
477    fn next(&mut self) -> Option<Self::Item> {
478        // If there is no next token, we're at the end.
479        self.reader.peek()?;
480
481        Some(
482            self.take_annotated_microdesc()
483                .map_err(|e| e.within(self.reader.str())),
484        )
485    }
486}
487
488#[cfg(test)]
489mod test {
490    // @@ begin test lint list maintained by maint/add_warning @@
491    #![allow(clippy::bool_assert_comparison)]
492    #![allow(clippy::clone_on_copy)]
493    #![allow(clippy::dbg_macro)]
494    #![allow(clippy::mixed_attributes_style)]
495    #![allow(clippy::print_stderr)]
496    #![allow(clippy::print_stdout)]
497    #![allow(clippy::single_char_pattern)]
498    #![allow(clippy::unwrap_used)]
499    #![allow(clippy::unchecked_duration_subtraction)]
500    #![allow(clippy::useless_vec)]
501    #![allow(clippy::needless_pass_by_value)]
502    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
503    use super::*;
504    use hex_literal::hex;
505    const TESTDATA: &str = include_str!("../../testdata/microdesc1.txt");
506    const TESTDATA2: &str = include_str!("../../testdata/microdesc2.txt");
507    const TESTDATA3: &str = include_str!("../../testdata/microdesc3.txt");
508    const TESTDATA4: &str = include_str!("../../testdata/microdesc4.txt");
509
510    fn read_bad(fname: &str) -> String {
511        use std::fs;
512        use std::path::PathBuf;
513        let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
514        path.push("testdata");
515        path.push("bad-mds");
516        path.push(fname);
517
518        fs::read_to_string(path).unwrap()
519    }
520
521    #[test]
522    fn parse_single() -> Result<()> {
523        let _md = Microdesc::parse(TESTDATA)?;
524        Ok(())
525    }
526
527    #[test]
528    fn parse_no_tap_key() -> Result<()> {
529        let _md = Microdesc::parse(TESTDATA3)?;
530        Ok(())
531    }
532
533    #[test]
534    fn parse_multi() -> Result<()> {
535        use humantime::parse_rfc3339;
536        let mds: Result<Vec<_>> =
537            MicrodescReader::new(TESTDATA2, &AllowAnnotations::AnnotationsAllowed)?.collect();
538        let mds = mds?;
539        assert_eq!(mds.len(), 4);
540
541        assert_eq!(
542            mds[0].ann.last_listed.unwrap(),
543            parse_rfc3339("2020-01-27T18:52:09Z").unwrap()
544        );
545        assert_eq!(
546            mds[0].md().digest(),
547            &hex!("38c71329a87098cb341c46c9c62bd646622b4445f7eb985a0e6adb23a22ccf4f")
548        );
549        assert_eq!(
550            mds[0].md().ntor_key().as_bytes(),
551            &hex!("5e895d65304a3a1894616660143f7af5757fe08bc18045c7855ee8debb9e6c47")
552        );
553        assert!(mds[0].md().ipv4_policy().allows_port(993));
554        assert!(mds[0].md().ipv6_policy().allows_port(993));
555        assert!(!mds[0].md().ipv4_policy().allows_port(25));
556        assert!(!mds[0].md().ipv6_policy().allows_port(25));
557        assert_eq!(
558            mds[0].md().ed25519_id().as_bytes(),
559            &hex!("2d85fdc88e6c1bcfb46897fca1dba6d1354f93261d68a79e0b5bc170dd923084")
560        );
561
562        Ok(())
563    }
564
565    #[test]
566    fn parse_family_ids() -> Result<()> {
567        let mds: Vec<AnnotatedMicrodesc> =
568            MicrodescReader::new(TESTDATA4, &AllowAnnotations::AnnotationsNotAllowed)?
569                .collect::<Result<_>>()?;
570        assert_eq!(mds.len(), 2);
571        let md0 = mds[0].md();
572        let md1 = mds[1].md();
573        assert_eq!(md0.family_ids().len(), 0);
574        assert_eq!(
575            md1.family_ids(),
576            &[
577                "ed25519:dXMgdGhlIHRyaXVtcGguICAgIC1UaG9tYXMgUGFpbmU"
578                    .parse()
579                    .unwrap(),
580                "other:Example".parse().unwrap()
581            ]
582        );
583        assert!(matches!(md1.family_ids()[0], RelayFamilyId::Ed25519(_)));
584
585        Ok(())
586    }
587
588    #[test]
589    fn test_bad() {
590        use crate::Pos;
591        use crate::types::policy::PolicyError;
592        fn check(fname: &str, e: &Error) {
593            let content = read_bad(fname);
594            let res = Microdesc::parse(&content);
595            assert!(res.is_err());
596            assert_eq!(&res.err().unwrap(), e);
597        }
598
599        check(
600            "wrong-start",
601            &EK::WrongStartingToken
602                .with_msg("family")
603                .at_pos(Pos::from_line(1, 1)),
604        );
605        check(
606            "bogus-policy",
607            &EK::BadPolicy
608                .at_pos(Pos::from_line(9, 1))
609                .with_source(PolicyError::InvalidPort),
610        );
611        check("wrong-id", &EK::MissingToken.with_msg("id ed25519"));
612    }
613
614    #[test]
615    fn test_recover() -> Result<()> {
616        let mut data = read_bad("wrong-start");
617        data += TESTDATA;
618        data += &read_bad("wrong-id");
619
620        let res: Vec<Result<_>> =
621            MicrodescReader::new(&data, &AllowAnnotations::AnnotationsAllowed)?.collect();
622
623        assert_eq!(res.len(), 3);
624        assert!(res[0].is_err());
625        assert!(res[1].is_ok());
626        assert!(res[2].is_err());
627        Ok(())
628    }
629}