tor_netdoc/parse2.rs
1//! New netdoc parsing arrangements, with `derive`
2//!
3//! # Parsing principles
4//!
5//! A parseable network document is a type implementing [`NetdocParseable`].
6//! usually via the
7//! [`NetdocParseable` derive=deftly macro`](crate::derive_deftly_template_NetdocParseable).
8//!
9//! A document type is responsible for recognising its own heading item.
10//! Its parser will also be told other of structural items that it should not consume.
11//! The structural lines can then be used to pass control to the appropriate parser.
12//!
13//! A "structural item" is a netdoc item that is defines the structure of the document.
14//! This includes the intro items for whole documents,
15//! the items that introduce document sections
16//! (which we model by treating the section as a sub-document)
17//! and signature items (which introduce the signatures at the end of the document,
18//! and after which no non-signature items may appear).
19//!
20//! # Ordering
21//!
22//! We don't always parse things into a sorted order.
23//! Sorting will be done when assembling documents, before outputting.
24// TODO we don't implement deriving output yet.
25//!
26//! # Types, and signature handling
27//!
28//! Most top-level network documents are signed somehow.
29//! In this case there are three types:
30//!
31//! * **`FooSigned`**: a signed `Foo`, with its signatures, not yet verified.
32//! Implements [`NetdocSigned`],
33//! typically by invoking the
34//! [`NetdocSigned` derive macro](crate::derive_deftly_template_NetdocSigned)
35//! on `Foo`.
36//!
37//! Type-specific methods are provided for verification,
38//! to obtain a `Foo`.
39//!
40//! * **`Foo`**: the body data for the document.
41//! This doesn't contain any signatures.
42//! Having one of these to play with means signatures have already been validated.
43//! Implement `NetdocParseable`, via
44//! [derive](crate::derive_deftly_template_NetdocParseable).
45//!
46//! * **`FooSignatures`**: the signatures for a `Foo`.
47//! Implement `NetdocParseable`, via
48//! [derive](crate::derive_deftly_template_NetdocParseable),
49//! with `#[deftly(netdoc(signatures))]`.
50//!
51//! # Relationship to tor_netdoc::parse
52//!
53//! This is a completely new parsing approach, based on different principles.
54//! The key principle is the recognition of "structural keywords",
55//! recursively within a parsing stack, via the p`NetdocParseable`] trait.
56//!
57//! This allows the parser to be derived. We have type-driven parsing
58//! of whole Documents, Items, and their Arguments and Objects,
59//! including of their multiplicity.
60//!
61//! The different keyword handling means we can't use most of the existing lexer,
62//! and need new item parsing API:
63//!
64//! * [`NetdocParseable`] trait.
65//! * [`KeywordRef`] type.
66//! * [`ItemStream`], [`UnparsedItem`], [`ArgumentStream`], [`UnparsedObject`].
67//!
68//! The different error handling means we have our own error types.
69//! (The crate's existing parse errors have information that we don't track,
70//! and is also a portmanteau error for parsing, writing, and other functions.)
71//!
72//! Document signing is handled in a more abstract way.
73//!
74//! Some old netdoc constructs are not supported.
75//! For example, the obsolete `opt` prefix on safe-to-ignore Items.
76//! The parser may make different decisions about netdocs with anomalous item ordering.
77
78#[doc(hidden)]
79#[macro_use]
80pub mod internal_prelude;
81
82#[macro_use]
83mod structural;
84
85#[macro_use]
86mod derive;
87
88mod error;
89mod impls;
90pub mod keyword;
91mod lex;
92mod lines;
93pub mod multiplicity;
94mod signatures;
95mod traits;
96
97#[cfg(feature = "plain-consensus")]
98pub mod poc;
99
100use internal_prelude::*;
101
102pub use error::{ArgumentError, ErrorProblem, ParseError, UnexpectedArgument, VerifyFailed};
103pub use impls::raw_data_object;
104pub use impls::times::NdaSystemTimeDeprecatedSyntax;
105pub use keyword::KeywordRef;
106pub use lex::{ArgumentStream, ItemStream, NoFurtherArguments, UnparsedItem, UnparsedObject};
107pub use lines::{Lines, Peeked, StrExt};
108pub use signatures::{
109 SignatureHashInputs, SignatureItemParseable, check_validity_time, sig_hash_methods,
110};
111pub use structural::{StopAt, StopPredicate};
112pub use traits::{
113 ItemArgumentParseable, ItemObjectParseable, ItemValueParseable, NetdocParseable,
114 NetdocParseableFields, NetdocSigned,
115};
116
117#[doc(hidden)]
118pub use derive::netdoc_parseable_derive_debug;
119
120//---------- input ----------
121
122/// Options for parsing
123///
124/// Specific document and type parsing methods may use these parameters
125/// to control their parsing behaviour at run-time.
126#[derive(educe::Educe, Debug, Clone)]
127#[allow(clippy::manual_non_exhaustive)]
128#[educe(Default)]
129pub struct ParseOptions {
130 /// Retain unknown values?
131 ///
132 /// Some field types, especially for flags fields, have the capability to retain
133 /// unknown flags. But, whereas known flags can be represented as single bits,
134 /// representing unknown flags involves allocating and copying strings.
135 /// Unless the document is to be reproduced, this is a waste of effort.
136 ///
137 /// Each document field type affected by this option should store the unknowns
138 /// as `Unknown<HashSet<String>>` or similar.
139 ///
140 /// This feature should only be used where performance is important.
141 /// For example, it is useful for types that appear in md consensus routerdescs,
142 /// but less useful for types that appear only in a netstatus preamble.
143 ///
144 /// This is currently used for router flags.
145 #[educe(Default(expression = "Unknown::new_discard()"))]
146 pub retain_unknown_values: Unknown<()>,
147
148 // Like `#[non_exhaustive]`, but doesn't prevent use of struct display syntax with `..`
149 #[doc(hidden)]
150 _private_non_exhaustive: (),
151}
152
153/// Input to a network document top-level parsing operation
154pub struct ParseInput<'s> {
155 /// The actual document text
156 input: &'s str,
157 /// Filename (for error reporting)
158 file: &'s str,
159 /// Parsing options
160 options: ParseOptions,
161}
162
163impl<'s> ParseInput<'s> {
164 /// Prepare to parse an input string
165 pub fn new(input: &'s str, file: &'s str) -> Self {
166 ParseInput {
167 input,
168 file,
169 options: ParseOptions::default(),
170 }
171 }
172}
173
174//---------- parser ----------
175
176/// Common code for `parse_netdoc` and `parse_netdoc_multiple`
177///
178/// Creates the `ItemStream`, calls `parse_completely`, and handles errors.
179fn parse_internal<T, D: NetdocParseable>(
180 input: &ParseInput<'_>,
181 parse_completely: impl FnOnce(&mut ItemStream) -> Result<T, ErrorProblem>,
182) -> Result<T, ParseError> {
183 let mut items = ItemStream::new(input)?;
184 parse_completely(&mut items).map_err(|problem| ParseError {
185 problem,
186 doctype: D::doctype_for_error(),
187 file: input.file.to_owned(),
188 lno: items.lno_for_error(),
189 column: problem.column(),
190 })
191}
192
193/// Parse a network document - **toplevel entrypoint**
194pub fn parse_netdoc<D: NetdocParseable>(input: &ParseInput<'_>) -> Result<D, ParseError> {
195 parse_internal::<_, D>(input, |items| {
196 let doc = D::from_items(items, StopAt(false))?;
197 if let Some(_kw) = items.peek_keyword()? {
198 return Err(EP::MultipleDocuments);
199 }
200 Ok(doc)
201 })
202}
203
204/// Parse a network document - **toplevel entrypoint**
205pub fn parse_netdoc_multiple<D: NetdocParseable>(
206 input: &ParseInput<'_>,
207) -> Result<Vec<D>, ParseError> {
208 parse_internal::<_, D>(input, |items| {
209 let mut docs = vec![];
210 while items.peek_keyword()?.is_some() {
211 let doc = D::from_items(items, StopAt(false))?;
212 docs.push(doc);
213 }
214 Ok(docs)
215 })
216}