tor_netdoc/parse2/
lex.rs

1//! Lexing of netdoc elements
2
3use super::*;
4
5/// Linear whitespace as defined by torspec
6// Only pub via internal_prelude, for benefit of macros
7pub const WS: &[char] = &[' ', '\t'];
8
9/// Top-level reader: Netdoc text interpreted as a stream of items
10#[derive(Debug, Clone)]
11pub struct ItemStream<'s> {
12    /// The whole document.  Used for signature hashing.
13    whole_for_signatures: &'s str,
14    /// Remaining document, as a stream of lines
15    lines: Lines<'s>,
16    /// If we have peeked ahead, what we discovered
17    peeked: PeekState<'s>,
18}
19
20/// Whether an `ItemStream` has peeked ahead, and if so what it discovered
21#[derive(Debug, Clone)]
22enum PeekState<'s> {
23    /// We've peeked a line
24    Some(ItemStreamPeeked<'s>),
25    /// We've not peeked, or peeking gave `None`
26    None {
27        /// Line number of the last item we yielded.
28        ///
29        /// `0` at the start.
30        yielded_item_lno: usize,
31    },
32}
33
34/// If an `ItemStream` has peeked ahead, what it discovered
35#[derive(Debug, Clone)]
36struct ItemStreamPeeked<'s> {
37    /// The next keyword
38    keyword: KeywordRef<'s>,
39    /// Token proving that we
40    line: lines::Peeked,
41    /// Length of the suffix of the line that is the arguments rather than the keyword
42    ///
43    /// Does not include the first whitespace, that terminated the keyword.
44    args_len: usize,
45}
46
47/// An Item that has been lexed but not parsed
48#[derive(Debug, Clone, amplify::Getters)]
49pub struct UnparsedItem<'s> {
50    /// The item's Keyword
51    #[getter(as_copy)]
52    keyword: KeywordRef<'s>,
53    /// The Item's Arguments
54    #[getter(skip)]
55    args: ArgumentStream<'s>,
56    /// The Item's Object, if there was one
57    #[getter(as_clone)]
58    object: Option<UnparsedObject<'s>>,
59}
60
61/// Reader for arguments on an Item
62///
63/// Represents the (remaining) arguments.
64#[derive(Debug, Clone)]
65pub struct ArgumentStream<'s> {
66    /// The remaining unparsed arguments
67    ///
68    /// Can start with WS, which is usually trimmed
69    rest: &'s str,
70}
71
72/// An Object that has been lexed but not parsed
73#[derive(Debug, Clone, amplify::Getters)]
74pub struct UnparsedObject<'s> {
75    /// The Label
76    #[getter(as_copy)]
77    label: &'s str,
78    /// The portion of the input document which is base64 data (and newlines)
79    #[getter(skip)]
80    data_b64: &'s str,
81}
82
83impl<'s> ItemStream<'s> {
84    /// Start reading a network document as a series of Items
85    pub fn new(s: &'s str) -> Result<Self, ParseError> {
86        Ok(ItemStream {
87            whole_for_signatures: s,
88            lines: Lines::new(s),
89            peeked: PeekState::None {
90                yielded_item_lno: 0,
91            },
92        })
93    }
94
95    /// Line number for reporting an error we have just discovered
96    ///
97    /// If we have recent peeked, we report the line number of the peeked keyword line.
98    ///
99    /// Otherwise, we report the line number of the most-recently yielded item.
100    pub fn lno_for_error(&self) -> usize {
101        match self.peeked {
102            PeekState::Some { .. } => {
103                // The error was presumably caused by whatever was seen in the peek.
104                // That's the current line number.
105                self.lines.peek_lno()
106            }
107            PeekState::None { yielded_item_lno } => {
108                // The error was presumably caused by the results of next_item().
109                yielded_item_lno
110            }
111        }
112    }
113
114    /// Core of peeking.  Tries to make `.peeked` be `Some`.
115    fn peek_internal<'i>(&'i mut self) -> Result<(), EP> {
116        if matches!(self.peeked, PeekState::None { .. }) {
117            let Some(peeked) = self.lines.peek() else {
118                return Ok(());
119            };
120
121            let peeked_line = self.lines.peeked_line(&peeked);
122
123            let (keyword, args) = peeked_line.split_once(WS).unwrap_or((peeked_line, ""));
124            let keyword = KeywordRef::new(keyword)?;
125
126            self.peeked = PeekState::Some(ItemStreamPeeked {
127                keyword,
128                line: peeked,
129                args_len: args.len(),
130            });
131        }
132
133        Ok(())
134    }
135
136    /// Peek the next keyword
137    pub fn peek_keyword(&mut self) -> Result<Option<KeywordRef<'s>>, EP> {
138        self.peek_internal()?;
139        let PeekState::Some(peeked) = &self.peeked else {
140            return Ok(None);
141        };
142        Ok(Some(peeked.keyword))
143    }
144
145    /// Obtain the body so far, suitable for hashing for a Regular signature
146    pub fn body_sofar_for_signature(&self) -> SignedDocumentBody<'s> {
147        let body = self
148            .whole_for_signatures
149            .strip_end_counted(self.lines.remaining().len());
150        SignedDocumentBody { body }
151    }
152
153    /// Parse a (sub-)document with its own signatures
154    pub fn parse_signed<
155        B: NetdocParseable,
156        S: NetdocParseable,
157        O: NetdocSigned<Body = B, Signatures = S>,
158    >(
159        &mut self,
160        outer_stop: stop_at!(),
161    ) -> Result<O, EP> {
162        let mut input = ItemStream {
163            whole_for_signatures: &self.whole_for_signatures
164                [self.whole_for_signatures.len() - self.lines.remaining().len()..],
165            ..self.clone()
166        };
167        let r = (|| {
168            let inner_always_stop = outer_stop | StopAt::doc_intro::<B>();
169            let body = B::from_items(&mut input, inner_always_stop | StopAt::doc_intro::<S>())?;
170            let signatures = S::from_items(&mut input, inner_always_stop)?;
171            let signed = O::from_parts(body, signatures);
172            Ok(signed)
173        })(); // don't exit here
174
175        *self = ItemStream {
176            whole_for_signatures: self.whole_for_signatures,
177            ..input
178        };
179
180        r
181    }
182
183    /// Obtain the inputs that would be needed to hash any (even Irregular) signature
184    ///
185    /// These are the hash inputs which would be needed for the next item,
186    /// assuming it's a signature keyword.
187    pub fn peek_signature_hash_inputs(
188        &mut self,
189        body: SignedDocumentBody<'s>,
190    ) -> Result<Option<SignatureHashInputs<'s>>, EP> {
191        self.peek_internal()?;
192        let PeekState::Some(peeked) = &self.peeked else {
193            return Ok(None);
194        };
195        let signature_item_line = self.lines.peeked_line(&peeked.line);
196        let signature_item_kw_spc = signature_item_line.strip_end_counted(peeked.args_len);
197        Ok(Some(SignatureHashInputs {
198            body,
199            signature_item_kw_spc,
200            signature_item_line,
201        }))
202    }
203
204    /// Yield the next item.
205    pub fn next_item(&mut self) -> Result<Option<UnparsedItem<'s>>, EP> {
206        self.peek_internal()?;
207        let peeked = match self.peeked {
208            PeekState::None { .. } => return Ok(None),
209            PeekState::Some { .. } => match mem::replace(
210                &mut self.peeked,
211                PeekState::None {
212                    yielded_item_lno: self.lines.peek_lno(),
213                },
214            ) {
215                PeekState::Some(peeked) => peeked,
216                PeekState::None { .. } => panic!("it was Some just now"),
217            },
218        };
219
220        let keyword = peeked.keyword;
221        let line = self.lines.consume_peeked(peeked.line);
222        let args = &line[keyword.len()..];
223        let args = ArgumentStream::new(args);
224
225        let object = if self.lines.remaining().starts_with('-') {
226            fn pem_delimiter<'s>(lines: &mut Lines<'s>, start: &str) -> Result<&'s str, EP> {
227                let line = lines.next().ok_or(
228                    // If this is the *header*, we already know there's a line,
229                    // so this error path is only for footers.
230                    EP::ObjectMissingFooter,
231                )?;
232                let label = line
233                    .strip_prefix(start)
234                    .ok_or(EP::InvalidObjectDelimiters)?
235                    .strip_suffix(PEM_AFTER_LABEL)
236                    .ok_or(EP::InvalidObjectDelimiters)?;
237                Ok(label)
238            }
239
240            let label1 = pem_delimiter(&mut self.lines, PEM_HEADER_START)?;
241            let base64_start_remaining = self.lines.remaining();
242            while !self.lines.remaining().starts_with('-') {
243                let _: &str = self.lines.next().ok_or(EP::ObjectMissingFooter)?;
244            }
245            let data_b64 = base64_start_remaining.strip_end_counted(self.lines.remaining().len());
246            let label2 = pem_delimiter(&mut self.lines, PEM_FOOTER_START)?;
247            let label = [label1, label2]
248                .into_iter()
249                .all_equal_value()
250                .map_err(|_| EP::ObjectMismatchedLabels)?;
251            Some(UnparsedObject { label, data_b64 })
252        } else {
253            None
254        };
255
256        Ok(Some(UnparsedItem {
257            keyword,
258            args,
259            object,
260        }))
261    }
262}
263
264impl<'s> UnparsedItem<'s> {
265    /// Access the arguments, mutably (for consuming and parsing them)
266    pub fn args_mut(&mut self) -> &mut ArgumentStream<'s> {
267        &mut self.args
268    }
269    /// Access a copy of the arguments
270    ///
271    /// When using this, be careful not to process any arguments twice.
272    pub fn args_copy(&self) -> ArgumentStream<'s> {
273        self.args.clone()
274    }
275}
276
277/// End of an argument list that does not accept any further (unknown) arguments
278///
279/// Implements `ItemArgumentParseable`.  Parses successfully iff the argument list is empty.
280#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
281#[allow(clippy::exhaustive_structs)]
282pub struct NoFurtherArguments;
283
284impl ItemArgumentParseable for NoFurtherArguments {
285    fn from_args(args: &mut ArgumentStream, _field: &'static str) -> Result<Self, EP> {
286        args.reject_extra_args()
287    }
288}
289
290impl<'s> Iterator for ItemStream<'s> {
291    type Item = Result<UnparsedItem<'s>, EP>;
292    fn next(&mut self) -> Option<Result<UnparsedItem<'s>, EP>> {
293        self.next_item().transpose()
294    }
295}
296
297impl<'s> ArgumentStream<'s> {
298    /// Make a new `ArgumentStream` from a string
299    ///
300    /// The string may start with whitespace (which will be ignored).
301    pub fn new(rest: &'s str) -> Self {
302        ArgumentStream { rest }
303    }
304
305    /// Unwrap this `ArgumnetStream`, giving the remaining arguments as a string
306    ///
307    /// The returned string won't start with whitespace.
308    //
309    /// `self` will be empty on return.
310    // (We don't take `self` by value because that makes use with `UnparsedItem` annoying.)
311    pub fn into_remaining(&mut self) -> &'s str {
312        self.trim_start();
313        self.rest
314    }
315
316    /// Trim leading WS from `rest`
317    fn trim_start(&mut self) {
318        self.rest = self.rest.trim_start_matches(WS);
319    }
320
321    /// Trim leading whitespace, and then see if it's empty
322    pub fn is_nonempty_after_trim_start(&mut self) -> bool {
323        self.trim_start();
324        !self.rest.is_empty()
325    }
326
327    /// Throw and error if there are further arguments
328    //
329    // (We don't take `self` by value because that makes use with `UnparsedItem` annoying.)
330    pub fn reject_extra_args(&mut self) -> Result<NoFurtherArguments, EP> {
331        if self.is_nonempty_after_trim_start() {
332            Err(EP::UnexpectedArgument)
333        } else {
334            Ok(NoFurtherArguments)
335        }
336    }
337}
338
339impl<'s> Iterator for ArgumentStream<'s> {
340    type Item = &'s str;
341    fn next(&mut self) -> Option<&'s str> {
342        if !self.is_nonempty_after_trim_start() {
343            return None;
344        }
345        let arg;
346        (arg, self.rest) = self.rest.split_once(WS).unwrap_or((self.rest, ""));
347        Some(arg)
348    }
349}
350
351impl<'s> UnparsedObject<'s> {
352    /// Obtain the Object data, as decoded bytes
353    pub fn decode_data(&self) -> Result<Vec<u8>, EP> {
354        crate::parse::tokenize::base64_decode_multiline(self.data_b64)
355            .map_err(|_e| EP::ObjectInvalidBase64)
356    }
357}