1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
//! Lexing of netdoc elements
use super::*;
/// Linear whitespace as defined by torspec
// Only pub via internal_prelude, for benefit of macros
pub const WS: &[char] = &[' ', '\t'];
define_derive_deftly! {
/// Define `parse_options` accessor
///
/// The driver must have a lifetime named `'s`, which is suitable for the returned
/// `&'s ParseOptions`.
///
/// # Top-level attributes:
///
/// * **`#[deftly(parse_options(field = ".field.field"))]`**, default `.options`
ParseOptions beta_deftly, expect items:
impl<$tgens> $ttype {
/// Examine the parsing options
pub fn parse_options(&self) -> &'s ParseOptions {
&self
${tmeta(parse_options(field))
as token_stream,
default { .options }}
}
}
}
/// Top-level reader: Netdoc text interpreted as a stream of items
#[derive(Debug, Clone, Deftly)]
#[derive_deftly(ParseOptions)]
pub struct ItemStream<'s> {
/// The whole input document.
whole_input: &'s str,
/// Remaining document, as a stream of lines
lines: Lines<'s>,
/// If we have peeked ahead, what we discovered
peeked: PeekState<'s>,
/// Parsing options.
options: &'s ParseOptions,
}
/// Whether an `ItemStream` has peeked ahead, and if so what it discovered
#[derive(Debug, Clone)]
enum PeekState<'s> {
/// We've peeked a line
Some(ItemStreamPeeked<'s>),
/// We've not peeked, or peeking gave `None`
None {
/// Line number of the last item we yielded.
///
/// `0` at the start.
yielded_item_lno: usize,
},
}
/// If an `ItemStream` has peeked ahead, what it discovered
#[derive(Debug, Clone)]
struct ItemStreamPeeked<'s> {
/// The next keyword
keyword: KeywordRef<'s>,
/// Token proving that we
line: lines::Peeked,
/// Length of the suffix of the line that is the arguments rather than the keyword
///
/// Does not include the first whitespace, that terminated the keyword.
args_len: usize,
}
/// An Item that has been lexed but not parsed
#[derive(Debug, Clone, amplify::Getters, Deftly)]
#[derive_deftly(ParseOptions)]
#[deftly(parse_options(field = ".args.options"))]
pub struct UnparsedItem<'s> {
/// The item's Keyword
#[getter(as_copy)]
keyword: KeywordRef<'s>,
/// The Item's Arguments
#[getter(skip)]
args: ArgumentStream<'s>,
/// The Item's Object, if there was one
#[getter(as_clone)]
object: Option<UnparsedObject<'s>>,
}
/// Reader for arguments on an Item
///
/// Represents the (remaining) arguments.
#[derive(Debug, Clone, Deftly)]
#[derive_deftly(ParseOptions)]
pub struct ArgumentStream<'s> {
/// The remaining unparsed arguments
///
/// Can start with WS, which is usually trimmed
rest: &'s str,
/// Original line length
///
/// Used for reporting column of argument errors.
whole_line_len: usize,
/// Remaining length *before* we last yielded.
previous_rest_len: usize,
/// Parsing options.
options: &'s ParseOptions,
}
/// An Object that has been lexed but not parsed
#[derive(Debug, Clone, amplify::Getters, Deftly)]
#[derive_deftly(ParseOptions)]
pub struct UnparsedObject<'s> {
/// The Label
#[getter(as_copy)]
label: &'s str,
/// The portion of the input document which is base64 data (and newlines)
#[getter(skip)]
data_b64: &'s str,
/// Parsing options.
options: &'s ParseOptions,
}
impl<'s> ItemStream<'s> {
/// Start reading a network document as a series of Items
pub fn new(input: &'s ParseInput<'s>) -> Result<Self, ParseError> {
Ok(ItemStream {
whole_input: input.input,
lines: Lines::new(input.input),
peeked: PeekState::None {
yielded_item_lno: 0,
},
options: &input.options,
})
}
/// Line number for reporting an error we have just discovered
///
/// If we have recent peeked, we report the line number of the peeked keyword line.
///
/// Otherwise, we report the line number of the most-recently yielded item.
pub fn lno_for_error(&self) -> usize {
match self.peeked {
PeekState::Some { .. } => {
// The error was presumably caused by whatever was seen in the peek.
// That's the current line number.
self.lines.peek_lno()
}
PeekState::None { yielded_item_lno } => {
// The error was presumably caused by the results of next_item().
yielded_item_lno
}
}
}
/// Core of peeking. Tries to make `.peeked` be `Some`.
fn peek_internal<'i>(&'i mut self) -> Result<(), EP> {
if matches!(self.peeked, PeekState::None { .. }) {
let Some(peeked) = self.lines.peek() else {
return Ok(());
};
let peeked_line = self.lines.peeked_line(&peeked);
let (keyword, args) = peeked_line.split_once(WS).unwrap_or((peeked_line, ""));
let keyword = KeywordRef::new(keyword)?;
self.peeked = PeekState::Some(ItemStreamPeeked {
keyword,
line: peeked,
args_len: args.len(),
});
}
Ok(())
}
/// Peek the next keyword
pub fn peek_keyword(&mut self) -> Result<Option<KeywordRef<'s>>, EP> {
self.peek_internal()?;
let PeekState::Some(peeked) = &self.peeked else {
return Ok(None);
};
Ok(Some(peeked.keyword))
}
/// Obtain the body so far, suitable for hashing for an Orderly signature
pub fn body_sofar_for_signature(&self) -> SignedDocumentBody<'s> {
let body = &self.whole_input[0..self.byte_position()];
SignedDocumentBody { body }
}
/// Byte position, pointing to the start of the next item to yield
///
/// Offset in bytes from the start of the original input string
/// to the "current" position,
/// ie to just after the item we yielded and just before the next item (or EOF).
pub fn byte_position(&self) -> usize {
self.whole_input.len() - self.lines.remaining().len()
}
/// Access for the entire input string
///
/// The original `input: &str` argument to [`ParseInput::new`].
///
/// Includes both yielded and unyielded items.
pub fn whole_input(&self) -> &'s str {
self.whole_input
}
/// Parse a (sub-)document with its own signatures
///
/// Used (mostly) by the
/// [`NetdocParseableUnverified`](derive_deftly_template_NetdocParseableUnverified)
/// derive macro.
///
/// Generic parameters:
///
/// * **`B`**: the body type: the type to which `NetdocParseableUnverified` is applied.
/// * **`S`**: the signatures section type.
/// * **`O`**: the `FooUnverified` type, which embodies the parsed body and signatures.
pub fn parse_signed<
B: HasUnverifiedParsedBody,
S: NetdocParseableSignatures,
O: NetdocUnverified<Body = B, Signatures = S>,
>(
&mut self,
outer_stop: stop_at!(),
) -> Result<O, EP> {
let mut input = ItemStream {
whole_input: &self.whole_input[self.whole_input.len() - self.lines.remaining().len()..],
..self.clone()
};
let r = (|| {
let inner_always_stop = outer_stop | StopAt::doc_intro::<B::UnverifiedParsedBody>();
let body = B::UnverifiedParsedBody::from_items(
&mut input,
inner_always_stop | StopAt(S::is_item_keyword),
)?;
let signed_doc_body = input.body_sofar_for_signature();
let unsigned_body_len = signed_doc_body.body().len();
let mut hashes = S::HashesAccu::default();
let sigs = S::from_items(&mut input, signed_doc_body, &mut hashes, inner_always_stop)?;
let sigs = SignaturesData {
sigs,
unsigned_body_len,
hashes,
};
// SECURITY
// We unwrap the UnverifiedParsedBody and immediately wrap it up again
// in FooUnverified, passing on the obligation to verify the signatures,
// and still enforcing that with a newtype.
let signed = O::from_parts(B::unverified_into_inner_unchecked(body), sigs);
Ok(signed)
})(); // don't exit here
*self = ItemStream {
whole_input: self.whole_input,
..input
};
r
}
/// Obtain the inputs that would be needed to hash any (even Disorderly) signature
///
/// These are the hash inputs which would be needed for the next item,
/// assuming it's a signature keyword.
pub fn peek_signature_hash_inputs(
&mut self,
body: SignedDocumentBody<'s>,
) -> Result<Option<SignatureHashInputs<'s>>, EP> {
self.peek_internal()?;
let PeekState::Some(peeked) = &self.peeked else {
return Ok(None);
};
let document_sofar = self.body_sofar_for_signature().body();
let signature_item_line = self.lines.peeked_line(&peeked.line);
let signature_item_kw_spc = signature_item_line.strip_end_counted(peeked.args_len);
Ok(Some(SignatureHashInputs {
body,
document_sofar,
signature_item_kw_spc,
signature_item_line,
}))
}
/// Yield the next item.
pub fn next_item(&mut self) -> Result<Option<UnparsedItem<'s>>, EP> {
self.peek_internal()?;
let peeked = match self.peeked {
PeekState::None { .. } => return Ok(None),
PeekState::Some { .. } => match mem::replace(
&mut self.peeked,
PeekState::None {
yielded_item_lno: self.lines.peek_lno(),
},
) {
PeekState::Some(peeked) => peeked,
PeekState::None { .. } => panic!("it was Some just now"),
},
};
let keyword = peeked.keyword;
let line = self.lines.consume_peeked(peeked.line);
let args = &line[keyword.len()..];
let options = self.options;
let args = ArgumentStream::new(args, line.len(), options);
let object = if self.lines.remaining().starts_with('-') {
fn pem_delimiter<'s>(lines: &mut Lines<'s>, start: &str) -> Result<&'s str, EP> {
let line = lines.next().ok_or(
// If this is the *header*, we already know there's a line,
// so this error path is only for footers.
EP::ObjectMissingFooter,
)?;
let label = line
.strip_prefix(start)
.ok_or(EP::InvalidObjectDelimiters)?
.strip_suffix(PEM_AFTER_LABEL)
.ok_or(EP::InvalidObjectDelimiters)?;
Ok(label)
}
let label1 = pem_delimiter(&mut self.lines, PEM_HEADER_START)?;
let base64_start_remaining = self.lines.remaining();
while !self.lines.remaining().starts_with('-') {
let _: &str = self.lines.next().ok_or(EP::ObjectMissingFooter)?;
}
let data_b64 = base64_start_remaining.strip_end_counted(self.lines.remaining().len());
let label2 = pem_delimiter(&mut self.lines, PEM_FOOTER_START)?;
let label = [label1, label2]
.into_iter()
.all_equal_value()
.map_err(|_| EP::ObjectMismatchedLabels)?;
Some(UnparsedObject {
label,
data_b64,
options,
})
} else {
None
};
Ok(Some(UnparsedItem {
keyword,
args,
object,
}))
}
}
impl<'s> UnparsedItem<'s> {
/// Access the arguments, mutably (for consuming and parsing them)
pub fn args_mut(&mut self) -> &mut ArgumentStream<'s> {
&mut self.args
}
/// Access a copy of the arguments
///
/// When using this, be careful not to process any arguments twice.
pub fn args_copy(&self) -> ArgumentStream<'s> {
self.args.clone()
}
/// Access the arguments (readonly)
///
/// When using this, be careful not to process any arguments twice.
pub fn args(&self) -> &ArgumentStream<'s> {
&self.args
}
/// Check that this item has no Object.
pub fn check_no_object(&self) -> Result<(), EP> {
if self.object.is_some() {
return Err(EP::ObjectUnexpected);
}
Ok(())
}
/// Convenience method for handling an error parsing an argument
///
/// Returns a closure that converts every error into [`ArgumentError::Invalid`]
/// and then to an [`ErrorProblem`] using
/// [`.args().handle_error()`](ArgumentStream::handle_error).
///
/// Useful in manual `ItemValueParseable` impls, when parsing arguments ad-hoc.
pub fn invalid_argument_handler<E>(
&self,
field: &'static str,
) -> impl FnOnce(E) -> ErrorProblem {
let error = self.args().handle_error(field, AE::Invalid);
move |_any_error| error
}
}
/// End of an argument list that does not accept any further (unknown) arguments
///
/// Implements `ItemArgumentParseable`. Parses successfully iff the argument list is empty.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
#[allow(clippy::exhaustive_structs)]
pub struct NoFurtherArguments;
impl ItemArgumentParseable for NoFurtherArguments {
fn from_args(args: &mut ArgumentStream) -> Result<Self, AE> {
Ok(args.reject_extra_args()?)
}
}
impl<'s> Iterator for ItemStream<'s> {
type Item = Result<UnparsedItem<'s>, EP>;
fn next(&mut self) -> Option<Result<UnparsedItem<'s>, EP>> {
self.next_item().transpose()
}
}
impl<'s> ArgumentStream<'s> {
/// Make a new `ArgumentStream` from a string
///
/// The string may start with whitespace (which will be ignored).
pub fn new(rest: &'s str, whole_line_len: usize, options: &'s ParseOptions) -> Self {
let previous_rest_len = whole_line_len;
ArgumentStream {
rest,
whole_line_len,
previous_rest_len,
options,
}
}
/// Consume this whole `ArgumentStream`, giving the remaining arguments as a string
///
/// The returned string won't start with whitespace.
//
/// `self` will be empty on return.
// (We don't take `self` by value because that makes use with `UnparsedItem` annoying.)
pub fn into_remaining(&mut self) -> &'s str {
self.prep_yield();
mem::take(&mut self.rest)
}
/// Return the component parts of this `ArgumentStream`
///
/// The returned string might start with whitespace.
pub fn whole_line_len(&self) -> usize {
self.whole_line_len
}
/// Prepares to yield an argument (or the rest)
///
/// * Trims leading WS from `rest`.
/// * Records the `previous_rest_len`
fn prep_yield(&mut self) {
self.rest = self.rest.trim_start_matches(WS);
self.previous_rest_len = self.rest.len();
}
/// Prepares to yield, and then determines if there *is* anything to yield.
///
/// * Trim leading whitespace
/// * Records the `previous_rest_len`
/// * See if we're now empty
pub fn something_to_yield(&mut self) -> bool {
self.prep_yield();
!self.rest.is_empty()
}
/// Throw and error if there are further arguments
//
// (We don't take `self` by value because that makes use with `UnparsedItem` annoying.)
pub fn reject_extra_args(&mut self) -> Result<NoFurtherArguments, UnexpectedArgument> {
if self.something_to_yield() {
let column = self.next_arg_column();
Err(UnexpectedArgument { column })
} else {
Ok(NoFurtherArguments)
}
}
/// Convert a "length of `rest`" into the corresponding column number.
fn arg_column_from_rest_len(&self, rest_len: usize) -> usize {
// Can't underflow since rest is always part of the whole.
// Can't overflow since that would mean the document was as big as the address space.
self.whole_line_len - rest_len + 1
}
/// Obtain the column number of the previously yielded argument.
///
/// (After `into_remaining`, gives the column number
/// of the start of the returned remaining argument string.)
pub fn prev_arg_column(&self) -> usize {
self.arg_column_from_rest_len(self.previous_rest_len)
}
/// Obtains the column number of the *next* argument.
///
/// Should be called after `something_to_yield`; otherwise the returned value
/// may point to whitespace which is going to be skipped.
// ^ this possible misuse doesn't seem worth defending against with type-fu,
// for a private function with few call sites.
fn next_arg_column(&self) -> usize {
self.arg_column_from_rest_len(self.rest.len())
}
/// Convert an `ArgumentError` to an `ErrorProblem`.
///
/// The caller must supply the field name.
pub fn handle_error(&self, field: &'static str, ae: ArgumentError) -> ErrorProblem {
self.error_handler(field)(ae)
}
/// Return a converter from `ArgumentError` to `ErrorProblem`.
///
/// Useful in `.map_err`.
pub fn error_handler(
&self,
field: &'static str,
) -> impl Fn(ArgumentError) -> ErrorProblem + 'static {
let column = self.prev_arg_column();
move |ae| match ae {
AE::Missing => EP::MissingArgument { field },
AE::Invalid => EP::InvalidArgument { field, column },
AE::Unexpected => EP::UnexpectedArgument { column },
}
}
}
impl<'s> Iterator for ArgumentStream<'s> {
type Item = &'s str;
fn next(&mut self) -> Option<&'s str> {
if !self.something_to_yield() {
return None;
}
let arg;
(arg, self.rest) = self.rest.split_once(WS).unwrap_or((self.rest, ""));
Some(arg)
}
}
impl<'s> UnparsedObject<'s> {
/// Obtain the Object data, as decoded bytes
pub fn decode_data(&self) -> Result<Vec<u8>, EP> {
crate::parse::tokenize::base64_decode_multiline(self.data_b64)
.map_err(|_e| EP::ObjectInvalidBase64)
}
}