1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
//! Based on a set of rules, validate a token stream and collect the
//! tokens by type.
//!
//! See the "rules" module for definitions of keywords types and
//! per-keyword rules.
//!
//! The key types in this module are SectionRules, which explains how to
//! validate and partition a stream of Item, and Section, which contains
//! a validated set of Item, ready to be interpreted.
//!
//! # Example
//!
//! (This is an internal API, so see the routerdesc.rs source for an
//! example of use.)

use crate::parse::keyword::Keyword;
use crate::parse::rules::*;
use crate::parse::tokenize::*;
use crate::{Error, Result};

/// Describe the rules for one section of a document.
///
/// The rules are represented as a mapping from token index to
/// rules::TokenFmt.
#[derive(Clone)]
pub(crate) struct SectionRules<T: Keyword> {
    /// A set of rules for decoding a series of tokens into a Section
    /// object.  Each element of this array corresponds to the
    /// token with the corresponding index values.
    ///
    /// When an array element is None, the corresponding keyword is
    /// not allowed in this kind section.  Otherwise, the array
    /// element is a TokenFmt describing how many of the corresponding
    /// token may appear, and what they need to look like.
    rules: Vec<Option<TokenFmt<T>>>,
}

/// The entry or entries for a particular keyword within a document.
#[derive(Clone)]
enum TokVal<'a, K: Keyword> {
    /// No value has been found.
    None,
    /// A single value has been found; we're storing it in place.
    ///
    /// We use a one-element array here so that we can return a slice
    /// of the array.
    Some([Item<'a, K>; 1]),
    /// Multiple vlaues have been found; they go in a vector.
    Multi(Vec<Item<'a, K>>),
}
impl<'a, K: Keyword> TokVal<'a, K> {
    /// Return the number of Items for this value.
    fn count(&self) -> usize {
        match self {
            TokVal::None => 0,
            TokVal::Some(_) => 1,
            TokVal::Multi(v) => v.len(),
        }
    }
    /// Return the first Item for this value, or None if there wasn't one.
    fn first(&self) -> Option<&Item<'a, K>> {
        match self {
            TokVal::None => None,
            TokVal::Some([t]) => Some(t),
            TokVal::Multi(v) => Some(&v[0]),
        }
    }
    /// Return the Item for this value, if there is exactly one.
    fn singleton(&self) -> Option<&Item<'a, K>> {
        match self {
            TokVal::None => None,
            TokVal::Some([t]) => Some(t),
            TokVal::Multi(_) => None,
        }
    }
    /// Return all the Items for this value, as a slice.
    fn as_slice(&self) -> &[Item<'a, K>] {
        match self {
            TokVal::None => &[],
            TokVal::Some(t) => &t[..],
            TokVal::Multi(v) => &v[..],
        }
    }
    /// Return the last Item for this value, if any.
    fn last(&self) -> Option<&Item<'a, K>> {
        match self {
            TokVal::None => None,
            TokVal::Some([t]) => Some(t),
            TokVal::Multi(v) => Some(&v[v.len() - 1]),
        }
    }
}

/// A Section is the result of sorting a document's entries by keyword.
///
/// TODO: I'd rather have this be pub(crate), but I haven't figured out
/// how to make that work.
pub struct Section<'a, T: Keyword> {
    /// Map from Keyword index to TokVal
    v: Vec<TokVal<'a, T>>,
    /// The keyword that appeared first in this section.  This will
    /// be set if `v` is nonempty.
    first: Option<T>,
    /// The keyword that appeared last in this section.  This will
    /// be set if `v` is nonempty.
    last: Option<T>,
}

impl<'a, T: Keyword> Section<'a, T> {
    /// Make a new empty Section.
    fn new() -> Self {
        let n = T::n_vals();
        let mut v = Vec::with_capacity(n);
        v.resize(n, TokVal::None);
        Section {
            v,
            first: None,
            last: None,
        }
    }
    /// Helper: return the tokval for some Keyword.
    fn tokval(&self, t: T) -> &TokVal<'a, T> {
        let idx = t.idx();
        &self.v[idx]
    }
    /// Return all the Items for some Keyword, as a slice.
    pub(crate) fn slice(&self, t: T) -> &[Item<'a, T>] {
        self.tokval(t).as_slice()
    }
    /// Return a single Item for some Keyword, if there is exactly one.
    pub(crate) fn get(&self, t: T) -> Option<&Item<'a, T>> {
        self.tokval(t).singleton()
    }
    /// Return a single Item for some Keyword, giving an error if there
    /// is not exactly one.
    ///
    /// It is usually a mistake to use this function on a Keyword that is
    /// not required.
    pub(crate) fn required(&self, t: T) -> Result<&Item<'a, T>> {
        self.get(t).ok_or_else(|| Error::MissingToken(t.to_str()))
    }
    /// Return a proxy MaybeItem object for some keyword.
    //
    /// A MaybeItem is used to represent an object that might or might
    /// not be there.
    pub(crate) fn maybe<'b>(&'b self, t: T) -> MaybeItem<'b, 'a, T> {
        MaybeItem::from_option(self.get(t))
    }
    /// Return the first item that was accepted for this section, or None
    /// if no items were accepted for this section.
    pub(crate) fn first_item(&self) -> Option<&Item<'a, T>> {
        match self.first {
            None => None,
            Some(t) => self.tokval(t).first(),
        }
    }
    /// Return the last item that was accepted for this section, or None
    /// if no items were accepted for this section.
    pub(crate) fn last_item(&self) -> Option<&Item<'a, T>> {
        match self.last {
            None => None,
            Some(t) => self.tokval(t).last(),
        }
    }
    /// Insert an `item`.
    ///
    /// The `item` must have parsed Keyword `t`.
    fn add_tok(&mut self, t: T, item: Item<'a, T>) {
        let idx = Keyword::idx(t);
        if idx >= self.v.len() {
            self.v.resize(idx + 1, TokVal::None);
        }
        let m = &mut self.v[idx];

        match m {
            TokVal::None => *m = TokVal::Some([item]),
            TokVal::Some([x]) => {
                *m = TokVal::Multi(vec![x.clone(), item]);
            }
            TokVal::Multi(ref mut v) => {
                v.push(item);
            }
        };
        if self.first.is_none() {
            self.first = Some(t);
        }
        self.last = Some(t);
    }
}

impl<T: Keyword> SectionRules<T> {
    /// Create a new SectionRules with no rules.
    ///
    /// By default, no Keyword is allowed by this SectionRules.
    pub(crate) fn new() -> Self {
        let n = T::n_vals();
        let mut rules = Vec::with_capacity(n);
        rules.resize(n, None);
        SectionRules { rules }
    }

    /// Add a rule to this SectionRules, based on a TokenFmtBuilder.
    ///
    /// Requires that no rule yet exists for the provided keyword.
    pub(crate) fn add(&mut self, t: TokenFmtBuilder<T>) {
        let rule: TokenFmt<_> = t.into();
        let idx = rule.kwd().idx();
        assert!(self.rules[idx].is_none());
        self.rules[idx] = Some(rule);
    }

    /// Parse a stream of tokens into a Section object without (fully)
    /// verifying them.
    ///
    /// Some errors are detected early, but others only show up later
    /// when we validate more carefully.
    fn parse_unverified<'a, I>(&self, tokens: &mut I, section: &mut Section<'a, T>) -> Result<()>
    where
        I: Iterator<Item = Result<Item<'a, T>>>,
    {
        for item in tokens {
            let item = item?;

            let tok = item.kwd();
            let tok_idx = tok.idx();
            if let Some(rule) = &self.rules[tok_idx] {
                // we want this token.
                assert!(rule.kwd() == tok);
                section.add_tok(tok, item);
                rule.check_multiplicity(section.v[tok_idx].as_slice())?;
            } else {
                // We don't have a rule for this token.
                return Err(Error::UnexpectedToken(tok.to_str(), item.pos()));
            }
        }
        Ok(())
    }

    /// Check whether the tokens in a section we've parsed conform to
    /// these rules.
    fn validate<'a>(&self, s: &Section<'a, T>) -> Result<()> {
        // These vectors are both generated from T::n_vals().
        assert_eq!(s.v.len(), self.rules.len());

        // Iterate over every item, and make sure it matches the
        // corresponding rule.
        for (rule, t) in self.rules.iter().zip(s.v.iter()) {
            match rule {
                None => {
                    // We aren't supposed to have any of these.
                    if t.count() > 0 {
                        unreachable!(
                            "This item should have been rejected earlier, in parse_unverified()"
                        );
                    }
                }
                Some(rule) => {
                    // We're allowed to have this. Is the number right?
                    rule.check_multiplicity(t.as_slice())?;
                    // The number is right. Check each individual item.
                    for item in t.as_slice() {
                        rule.check_item(item)?
                    }
                }
            }
        }

        Ok(())
    }

    /// Check all the base64-encoded objects on a given keyword.
    ///
    /// We use this to validate objects on unrecognized items, since
    /// otherwise nothing would check that they are well-formed.
    fn validate_objects<'a>(&self, s: &Section<'a, T>, kwd: T) -> Result<()> {
        for item in s.slice(kwd).iter() {
            let _ = item.obj_raw()?;
        }
        Ok(())
    }

    /// Parse a stream of tokens into a validated section.
    pub(crate) fn parse<'a, I>(&self, tokens: &mut I) -> Result<Section<'a, T>>
    where
        I: Iterator<Item = Result<Item<'a, T>>>,
    {
        let mut section = Section::new();
        self.parse_unverified(tokens, &mut section)?;
        self.validate(&section)?;
        self.validate_objects(&section, T::unrecognized())?;
        self.validate_objects(&section, T::ann_unrecognized())?;
        Ok(section)
    }
}

#[cfg(test)]
mod test {
    use super::SectionRules;
    use crate::parse::keyword::Keyword;
    use crate::parse::macros::test::Fruit;
    use crate::parse::tokenize::{Item, NetDocReader};
    use crate::{Error, Result};
    use once_cell::sync::Lazy;

    /// Rules for parsing a set of router annotations.
    static FRUIT_SALAD: Lazy<SectionRules<Fruit>> = Lazy::new(|| {
        use Fruit::*;
        let mut rules = SectionRules::new();
        rules.add(ANN_TASTY.rule().required().args(1..=1));
        rules.add(ORANGE.rule().args(1..));
        rules.add(STONEFRUIT.rule().may_repeat());
        rules.add(GUAVA.rule().obj_optional());
        rules.add(LEMON.rule().no_args().obj_required());
        rules
    });

    #[test]
    fn parse_section() -> Result<()> {
        use Fruit::*;
        let s = "\
@tasty yes
orange soda
cherry cobbler
cherry pie
plum compote
guava fresh from 7 trees
-----BEGIN GUAVA MANIFESTO-----
VGhlIGd1YXZhIGVtb2ppIGlzIG5vdCBjdXJyZW50bHkgc3VwcG9ydGVkIGluI
HVuaWNvZGUgMTMuMC4gTGV0J3MgZmlnaHQgYWdhaW5zdCBhbnRpLWd1YXZhIG
JpYXMu
-----END GUAVA MANIFESTO-----
lemon
-----BEGIN LEMON-----
8J+Niw==
-----END LEMON-----
";
        let mut r: NetDocReader<'_, Fruit> = NetDocReader::new(s);
        let sec = FRUIT_SALAD.parse(&mut r.iter()).unwrap();

        assert_eq!(sec.required(ANN_TASTY)?.arg(0), Some("yes"));

        assert!(sec.get(ORANGE).is_some());
        assert_eq!(sec.get(ORANGE).unwrap().args_as_str(), "soda");

        let stonefruit_slice = sec.slice(STONEFRUIT);
        assert_eq!(stonefruit_slice.len(), 3);
        let kwds: Vec<&str> = stonefruit_slice.iter().map(Item::kwd_str).collect();
        assert_eq!(kwds, &["cherry", "cherry", "plum"]);

        assert_eq!(sec.maybe(GUAVA).args_as_str(), Some("fresh from 7 trees"));
        assert_eq!(sec.maybe(GUAVA).parse_arg::<u32>(2).unwrap(), Some(7));
        assert!(sec.maybe(GUAVA).parse_arg::<u32>(1).is_err());

        assert_eq!(sec.get(GUAVA).unwrap().obj("GUAVA MANIFESTO").unwrap(),
                   &b"The guava emoji is not currently supported in unicode 13.0. Let's fight against anti-guava bias."[..]);

        assert_eq!(
            sec.get(ANN_TASTY).unwrap() as *const Item<'_, _>,
            sec.first_item().unwrap() as *const Item<'_, _>
        );

        assert_eq!(
            sec.get(LEMON).unwrap() as *const Item<'_, _>,
            sec.last_item().unwrap() as *const Item<'_, _>
        );

        Ok(())
    }

    #[test]
    fn rejected() {
        use crate::Pos;
        fn check(s: &str, e: Error) {
            let mut r: NetDocReader<'_, Fruit> = NetDocReader::new(s);
            let res = FRUIT_SALAD.parse(&mut r.iter());
            assert!(res.is_err());
            assert_eq!(res.err().unwrap().within(s), e);
        }

        // unrecognized tokens aren't allowed here
        check(
            "orange foo\nfoobar x\n@tasty yes\n",
            Error::UnexpectedToken("<unrecognized>", Pos::from_line(2, 1)),
        );

        // Only one orange per customer.
        check(
            "@tasty yes\norange foo\norange bar\n",
            Error::DuplicateToken("orange", Pos::from_line(3, 1)),
        );

        // There needs to be a declaration of tastiness.
        check("orange foo\n", Error::MissingToken("@tasty"));

        // You can't have an orange without an argument.
        check(
            "@tasty nope\norange\n",
            Error::TooFewArguments("orange", Pos::from_line(2, 1)),
        );
        // You can't have an more than one argument on "tasty".
        check(
            "@tasty yup indeed\norange normal\n",
            Error::TooManyArguments("@tasty", Pos::from_line(1, 1)),
        );

        // Every lemon needs an object
        check(
            "@tasty yes\nlemon\norange no\n",
            Error::MissingObject("lemon", Pos::from_line(2, 1)),
        );
    }
}