cddl_cat/parser/
mod.rs

1//! This module contains a CDDL parser.
2//!
3//! The only public items here are the function [`parse_cddl`] and the error
4//! [`ParseError`] and its child enum [`ErrorKind`].
5//!
6//! # Examples
7//! ```
8//! use cddl_cat::parse_cddl;
9//!
10//! let input = "map = { name: tstr }";
11//! assert!(parse_cddl(input).is_ok());
12//! ```
13
14use escape8259::unescape;
15use nom::{
16    branch::alt,
17    bytes::complete::{tag, take_while1},
18    character::complete::{
19        anychar, char as charx, digit0, digit1, hex_digit1, multispace1, not_line_ending, one_of,
20    },
21    combinator::{all_consuming, map, map_res, opt, recognize, value as valuex},
22    multi::{many0, many1, separated_list1},
23    sequence::{delimited, pair, preceded, separated_pair, terminated, tuple},
24};
25use std::convert::TryFrom;
26
27use crate::ast::*;
28use parse_err::{parse_error, CowParseError};
29
30mod parse_err;
31pub use parse_err::{ErrorKind, ParseError};
32
33//
34// A note on the design of the parser:
35//
36// Parsers built from the "nom" crate look a bit different from most other
37// Rust code; for extra readability there is a lot of extra indents that
38// rustfmt wouldn't like (and rustfmt::skip is applied extensively.)
39//
40
41// This error type is used everywhere in this parser.  It allows
42// me to mix locally-generated custom errors with the errors that
43// are automatically generated by the parser.
44type JResult<'a, I, O> = nom::IResult<I, O, CowParseError<'a>>;
45
46// A workaround for the fact that nom::combinator::map_res discards the returned error type.
47// See also https://github.com/Geal/nom/issues/1171
48fn map_res_fail<I: Clone, O1, O2, E: nom::error::ParseError<I>, F, G>(
49    mut first: F,
50    second: G,
51) -> impl FnMut(I) -> nom::IResult<I, O2, E>
52where
53    F: FnMut(I) -> nom::IResult<I, O1, E>,
54    G: Fn(O1) -> Result<O2, E>,
55{
56    move |input: I| {
57        let (input, o1) = first(input)?;
58        match second(o1) {
59            Ok(o2) => Ok((input, o2)),
60            Err(e) => Err(nom::Err::Failure(e)),
61        }
62    }
63}
64
65// CDDL whitespace definition:
66// Note no support for tabs, or naked linefeed characters.
67//
68// S = *WS
69// WS = SP / NL
70// SP = %x20
71// NL = COMMENT / CRLF
72// COMMENT = ";" *PCHAR CRLF
73// PCHAR = %x20-7E / %x80-10FFFD
74// CRLF = %x0A / %x0D.0A
75
76// This varies a bit from the RFC, again, with respect to whitespace.
77#[rustfmt::skip]
78fn comment(input: &str) -> JResult<&str, &str> {
79    // semicolon, anything, terminated by CR or CR+LF.
80    preceded(
81        charx(';'),
82        not_line_ending
83    )(input)
84}
85
86// Any amount of whitespace (including none) (including comments)
87// Note: this counts tab characters as whitespace, which differs from RFC8610.
88#[rustfmt::skip]
89fn ws(input: &str) -> JResult<&str, &str> {
90    recognize(
91        many0(
92            alt((
93                // multispace1 includes tabs
94                multispace1,
95                comment
96            ))
97        )
98    )
99    (input)
100}
101
102// An optional comma and any whitespace surrounding it.
103#[rustfmt::skip]
104fn optcom(input: &str) -> JResult<&str, ()> {
105    valuex((), pair(
106        ws,
107        opt(pair(
108            tag(","),
109            ws
110        ))
111    ))
112    (input)
113}
114
115// id = EALPHA *(*("-" / ".") (EALPHA / DIGIT))
116// So the first character needs to be EALPHA (alpha plus @_$)
117// and then any number of EALPHA or DIGIT or "-" or ".",
118// ending with EALPHA or DIGIT.
119// we'll take this in steps:
120// 1. first EALPHA char
121// 2. do the following zero or more times:
122//    a. optionally consume one of -.
123//    b. consume EALPHA or DIGIT.
124
125fn ealpha_char(c: char) -> bool {
126    c.is_ascii_alphabetic() || c == '@' || c == '_' || c == '$'
127}
128
129#[rustfmt::skip]
130fn ealpha(input: &str) -> JResult<&str, &str> {
131    take_while1(ealpha_char)
132    (input)
133}
134
135// The tail characters of an ident: *("-" / ".") (EALPHA / DIGIT)
136#[rustfmt::skip]
137fn ident_tail(input: &str) -> JResult<&str, &str> {
138    recognize(
139        preceded(
140            opt(many1(one_of("-."))),
141            alt((
142                ealpha,
143                digit1
144            ))
145        )
146    )
147    (input)
148}
149
150#[rustfmt::skip]
151fn ident(input: &str) -> JResult<&str, &str> {
152    recognize(
153        preceded(
154            ealpha,
155            many0(ident_tail)
156        )
157    )
158    (input)
159}
160
161// uint = DIGIT1 *DIGIT
162//      / "0x" 1*HEXDIG
163//      / "0b" 1*BINDIG
164//      / "0"
165#[rustfmt::skip]
166fn uint_hex(input: &str) -> JResult<&str, &str> {
167    preceded(
168        tag("0x"),
169        hex_digit1
170    )
171    (input)
172}
173
174#[rustfmt::skip]
175fn uint_binary(input: &str) -> JResult<&str, &str> {
176    preceded(
177        tag("0b"),
178        recognize(many1(one_of("01")))
179    )
180    (input)
181}
182
183#[rustfmt::skip]
184fn uint_decimal(input: &str) -> JResult<&str, &str> {
185    alt((
186        tag("0"),
187        recognize(
188            pair(
189                one_of("123456789"),
190                digit0
191            )
192        )
193    ))
194    (input)
195}
196
197// Parsing of integers, floats, etc. can get pretty complicated.  Because
198// different downstream parsers may want the integer in different forms, we
199// store its information in a temporary struct that preserves the original
200// string slice along with some context that will help us remember what that
201// string represents.
202struct RawUint<'a> {
203    slice: &'a str,
204    base: u32,
205}
206
207// Parse the string for uint; return the slice and the intended base (radix).
208#[rustfmt::skip]
209fn uint(input: &str) -> JResult<&str, RawUint> {
210    alt((
211        map(uint_hex, |slice| RawUint{slice, base: 16}),
212        map(uint_binary, |slice| RawUint{slice, base: 2}),
213        map(uint_decimal, |slice| {
214            RawUint{slice, base: 10}
215        }),
216    ))
217    (input)
218}
219
220// Extract an unsigned integer using the correct base (radix).
221#[rustfmt::skip]
222fn uint_u64(input: &str) -> JResult<&str, u64> {
223    map_res_fail(uint, |raw| {
224        u64::from_str_radix(raw.slice, raw.base)
225        .map_err(|_| {
226            parse_error(ErrorKind::MalformedInteger, raw.slice)
227        })
228    })
229    (input)
230}
231
232// Like RawUint, this is a temporary representation so we can
233// preserve both the string slice and some metadata about it.
234struct RawInt<'a> {
235    slice: &'a str,
236    base: u32,
237    neg: bool,
238}
239
240// A signed integer
241#[rustfmt::skip]
242fn int(input: &str) -> JResult<&str, RawInt> {
243    let f = pair(
244        opt(charx('-')),
245        uint
246    );
247    map(f, |(optneg, rawuint)| {
248        RawInt {
249            slice: rawuint.slice,
250            base: rawuint.base,
251            neg: optneg.is_some(),
252        }
253    })
254    (input)
255}
256
257// "." fraction
258// fraction = 1*DIGIT
259#[rustfmt::skip]
260fn dot_fraction(input: &str) -> JResult<&str, &str> {
261    recognize(
262        pair(
263            charx('.'),
264            digit1
265        )
266    )
267    (input)
268}
269
270// "e" exponent
271// exponent = ["+"/"-"] 1*DIGIT
272#[rustfmt::skip]
273fn e_exponent(input: &str) -> JResult<&str, &str> {
274    recognize(
275        tuple((
276            charx('e'),
277            opt(one_of("+-")),
278            digit1
279        ))
280    )
281    (input)
282}
283
284// A helper function for converting string -> Value::Float,
285// and mapping to the right error type
286#[rustfmt::skip]
287fn parse_float(s: &str) -> Result<Value, CowParseError> {
288    match s.parse::<f64>() {
289        Ok(fl) => Ok(Value::Float(fl)),
290        Err(_) => Err(parse_error(ErrorKind::MalformedFloat, s)),
291    }
292}
293
294// A helper function for converting RawInt -> Value::Xint,
295// and mapping to the right error type
296fn parse_int(raw: RawInt) -> Result<Value, CowParseError> {
297    // Note: the string slice doesn't contain the '-' character, so we
298    // need to handle that ourselves.
299    let posint = u64::from_str_radix(raw.slice, raw.base)
300        .map_err(|_| parse_error(ErrorKind::MalformedInteger, raw.slice))?;
301
302    if raw.neg {
303        // i64 has a larger positive range than negative range, so if we
304        // survive the conversion to i64 then unary `-` must succeed.
305        let negint: i64 = try_into_int(posint, raw.slice)?;
306        Ok(Value::Nint(-negint))
307    } else {
308        Ok(Value::Uint(posint))
309    }
310}
311
312// A rough approximation of nom's offset() function, allowing us to
313// do something like nom's recognize() without losing the inner types.
314fn offset(whole: &str, part: &str) -> usize {
315    part.as_ptr() as usize - whole.as_ptr() as usize
316}
317
318// This is similar to nom's `recognize` function.
319// The difference is that it doesn't throw away the inner parser's result;
320// it returns a tuple (slice, result) so you can have both.
321fn recognizer<'a, O, E, F>(
322    mut parser: F,
323) -> impl FnMut(&'a str) -> nom::IResult<&'a str, (&'a str, O), E>
324where
325    E: nom::error::ParseError<&'a str>,
326    F: FnMut(&'a str) -> nom::IResult<&'a str, O, E>,
327{
328    move |input: &'a str| match parser(input) {
329        Ok((i, output)) => {
330            let index = offset(input, i);
331            let output_slice = &input[..index];
332            let output_tuple = (output_slice, output);
333            Ok((i, output_tuple))
334        }
335        Err(e) => Err(e),
336    }
337}
338
339// int ["." fraction] ["e" exponent ]
340// (must have at least one of the latter two to be a float)
341fn float_or_int(input: &str) -> JResult<&str, Value> {
342    let f = recognizer(tuple((int, opt(dot_fraction), opt(e_exponent))));
343    map_res_fail(f, |(recognized, output)| {
344        let (firstint, frac, exp) = output;
345
346        // If we picked up a '.' or 'e' we are parsing a float; if neither we
347        // are parsing an integer.
348        let dot_or_e = frac.is_some() || exp.is_some();
349        if dot_or_e {
350            parse_float(recognized)
351        } else {
352            parse_int(firstint)
353        }
354    })(input)
355}
356
357// bytes = [bsqual] %x27 *BCHAR %x27
358// BCHAR = %x20-26 / %x28-5B / %x5D-10FFFD / SESC / CRLF
359// bsqual = "h" / "b64"
360//
361// Byte strings can come in 3 forms:
362// - 'abc\n' <= utf8 string interpreted as a byte string (and escaping is allowed)
363// - h'1234' or h'12 34' <= hex (base16) bytes; all whitespace is ignored.
364// - 'SGVsbG8gd29ybGQ=' <= base64 encoded byte string.
365// Also, byte strings can be concatenated, i.e. 'Hello ' 'world' == 'Hello world'.
366// See the RFC for details.
367
368#[rustfmt::skip]
369fn is_unescaped_bchar(c: char) -> bool {
370    let ranges = [
371        (0x20 ..= 0x26),
372        (0x28 ..= 0x5B),
373        (0x5D ..= 0x7E),
374        (0x80 ..= 0x10FFD),
375    ];
376    let cv = c as u32;
377
378    ranges.iter().any(|range| range.contains(&cv))
379}
380
381// One or more unescaped byte-string characters
382#[rustfmt::skip]
383fn unescaped_bchar(input: &str) -> JResult<&str, &str> {
384    take_while1(is_unescaped_bchar)
385    (input)
386}
387
388// FIXME: should also permit included CRLF
389// Zero or more byte-string characters
390#[rustfmt::skip]
391fn bchar(input: &str) -> JResult<&str, &str> {
392    recognize(
393        many0(
394            alt((
395                unescaped_bchar,
396                sesc
397            ))
398        )
399    )
400    (input)
401}
402
403// This is basically identical to `text_literal` except that
404// it uses single-quotes.
405#[rustfmt::skip]
406fn bytestring_utf8(input: &str) -> JResult<&str, String> {
407    let f = delimited(
408        tag("'"),
409        bchar,
410        tag("'")
411    );
412
413    map_res_fail(f, |s| {
414        unescape(s).map_err(|_| parse_error(ErrorKind::MalformedText, s) )
415    })
416    (input)
417}
418
419// This doesn't check that only the right characters are used;
420// that will be done later during parsing of the string.
421#[rustfmt::skip]
422fn bytestring_hex(input: &str) -> JResult<&str, &str> {
423    delimited(
424        tag("h'"),
425        bchar,
426        tag("\'")
427    )(input)
428}
429
430// This doesn't check that only the right characters are used;
431// that will be done later during parsing of the string.
432#[rustfmt::skip]
433fn bytestring_base64(input: &str) -> JResult<&str, &str> {
434    delimited(
435        tag("b64'"),
436        bchar,
437        charx('\'')
438    )(input)
439}
440
441// A helper function for parsing hex digits to bytes, while
442// ignoring whitespace and mapping to the right error type.
443fn parse_hex(s: &str) -> Result<Vec<u8>, CowParseError> {
444    // strip whitespace
445    // FIXME: this consumes more chars than the RFC says we should.
446    let s: String = s.chars().filter(|c| !c.is_ascii_whitespace()).collect();
447
448    hex::decode(&s).map_err(|_| parse_error(ErrorKind::MalformedHex, s))
449}
450
451#[rustfmt::skip]
452fn bytestring(input: &str) -> JResult<&str, Vec<u8>> {
453    alt((
454        map(bytestring_utf8, |s| s.as_bytes().into()),
455        map_res_fail(bytestring_hex, parse_hex),
456        map_res_fail(bytestring_base64, |s| {
457            base64::decode_config(s, base64::URL_SAFE).map_err(|_| {
458                parse_error(ErrorKind::MalformedBase64, s)
459            })
460        }),
461    ))
462    (input)
463}
464
465// text = %x22 *SCHAR %x22
466// SCHAR = %x20-21 / %x23-5B / %x5D-7E / %x80-10FFFD / SESC
467// SESC = "\" (%x20-7E / %x80-10FFFD)
468
469#[rustfmt::skip]
470fn is_unescaped_schar(c: char) -> bool {
471    let ranges = [
472        (0x20 ..= 0x21),
473        (0x23 ..= 0x5B),
474        (0x5D ..= 0x7E),
475        (0x80 ..= 0x10FFD),
476    ];
477    let cv = c as u32;
478
479    ranges.iter().any(|range| range.contains(&cv))
480}
481
482// One or more unescaped text characters
483#[rustfmt::skip]
484fn unescaped_schar(input: &str) -> JResult<&str, &str> {
485    take_while1(is_unescaped_schar)
486    (input)
487}
488
489// A single escaped character
490#[rustfmt::skip]
491fn sesc(input: &str) -> JResult<&str, &str> {
492    // FIXME: allow only (%x20-7E / %x80-10FFFD)
493    preceded(charx('\\'), recognize(anychar))
494    (input)
495}
496
497// Zero or more text characters
498#[rustfmt::skip]
499fn schar(input: &str) -> JResult<&str, &str> {
500    recognize(
501        many0(
502            alt((
503                unescaped_schar,
504                sesc
505            ))
506        )
507    )
508    (input)
509}
510
511#[rustfmt::skip]
512fn text_literal(input: &str) -> JResult<&str, String> {
513    let f = delimited(
514        charx('"'),
515        schar,
516        charx('"')
517    );
518
519    map_res_fail(f, |s| {
520        unescape(s).map_err(|_| parse_error(ErrorKind::MalformedText, s) )
521    })
522    (input)
523}
524
525// value = number / text / bytes
526// number = hexfloat / (int ["." fraction] ["e" exponent ])
527#[rustfmt::skip]
528fn value(input: &str) -> JResult<&str, Value> {
529    alt((
530        float_or_int,
531        map(text_literal, Value::Text),
532        map(bytestring, Value::Bytes),
533    ))(input)
534}
535
536// Match the ": Y" part of an "X : Y" memberkey.
537#[rustfmt::skip]
538fn grpent_memberkey_tail(input: &str) -> JResult<&str, Type> {
539    preceded(
540        pair(
541            tag(":"),
542            ws,
543        ),
544        ty,
545    )(input)
546}
547
548// A helper function for grpent_member for assembling the Member
549// from the key and value when using the "X:Y" syntax.
550//
551// The key must be a Type2::Value or Type2::Typename or this function
552// will panic.
553fn assemble_basic_member(key: Type1, value: Type) -> Result<Member, CowParseError<'static>> {
554    let member_key = match key {
555        Type1::Simple(Type2::Value(v)) => MemberKeyVal::Value(v),
556        Type1::Simple(Type2::Typename(s)) => {
557            // Because we used the "key:value" syntax, the typename may
558            // only be a plain string, without generic parameters.
559            // If generic parameters are desired, the "key=>value" syntax
560            // would be required (and we would not have arrived here).
561
562            if !s.generic_args.is_empty() {
563                return Err(parse_error(
564                    ErrorKind::Unparseable,
565                    "Bareword with generic arguments",
566                ));
567            }
568            MemberKeyVal::Bareword(s.name)
569        }
570        _ => panic!("assemble_basic_member wrong key type"),
571    };
572    Ok(Member {
573        key: Some(MemberKey {
574            val: member_key,
575            cut: true,
576        }),
577        value,
578    })
579}
580
581// grpent = [occur S] [memberkey S] type
582//        / [occur S] groupname [genericarg]  ; preempted by above
583//        / [occur S] "(" S group S ")"
584//
585// memberkey = type1 S ["^" S] "=>"
586//           / bareword S ":"
587//           / value S ":"
588//
589// Parsing the memberkey syntax is tricky, because it's easy to fall into
590// 2^N iterations due to backtracking.
591//
592// The problem arises when we see something like "[[[[ int ]]]]".
593// If we first search for a memberkey, we can go on an adventure trying
594// many different partial matches of the first memberkey type
595// ( type1 S ["^" S] "=>" ) before discarding that work because there is
596// no trailing "=>".
597//
598// The root problem is that "int" will match both the with-memberkey and
599// without-memberkey grammars, because it's both a type and a type1.
600// if we start wrapping it in N sets of array-brackets or map-brackets,
601// there are 2^N possible ways for that type1 to be parsed before we
602// discover the missing "=>".
603//
604// The solution is to change the parser to match this equivalent grammar:
605//
606// grpent = [occur S] member_type1
607//        / [occur S] groupname [genericarg]  ; preempted by above
608//        / [occur S] "(" S group S ")"
609//
610// member_type1 = bareword S ":" type
611//              / value S ":" type
612//              / type1 S ["^" S] "=>" type
613//
614// In this grammar, it's easier to see that the bareword and value nodes
615// also match the type1 node.  We avoid backtracking by matching the
616// type1 node first, and then peeking at which variant it is to see if
617// the ":" syntax is allowed.  If it's an id (bareword) or value, then
618// we attempt to match that syntax before trying the others.
619//
620#[rustfmt::skip]
621fn grpent_member(input: &str) -> JResult<&str, Member> {
622
623    // The leading Type1 is required for this parser.
624    let (input, first_type1) = terminated(type1, ws)(input)?;
625
626    // If the type1 matches is a plain value or typename (aka id), then we may
627    // be looking at a memberkey followed by a ":".  That syntax isn't allowed
628    // for other Type1 patterns.
629
630    match first_type1 {
631        Type1::Simple(Type2::Value(_)) |
632        Type1::Simple(Type2::Typename(_)) => {
633            // Next, try to match ":" ws ty
634            if let Ok((input, tail_type)) = grpent_memberkey_tail(input) {
635                let member = assemble_basic_member(first_type1, tail_type);
636                match member {
637                    Ok(member) => return Ok((input, member)),
638                    Err(e) => return Err(nom::Err::Failure(e)),
639                }
640
641            }
642        }
643        // "X:Y" isn't allowed when X is some other Type1 variant.
644        _ => {}
645    }
646
647    // grpent with memberkey followed by "=>" (with optional cut symbol "^")
648    // should return a Member, with key: Some(MemberKey{val, cut})
649    // and value: Type
650
651    if let Ok((input, matched_tuple)) = tuple((
652        opt(terminated(tag("^"), ws)),
653        tag("=>"),
654        ws,
655        ty,
656    ))(input) {
657        let (cut, _, _, val) = matched_tuple;
658        let member = Member {
659            key: Some(MemberKey {
660                val: MemberKeyVal::Type1(first_type1),
661                cut: cut.is_some(),
662            }),
663            value: val,
664        };
665        return Ok((input, member));
666    }
667
668    // grpent without memberkey
669    // should return a Member, with key: None and value: Type containing Vec<Type1>
670    // This is a lot like the fn `ty` but we will concatenate this with first_type1
671
672    if let Ok((input, mut ty1s)) = many0(
673        preceded(
674            delimited(ws, tag("/"), ws),
675            type1
676        )
677    )(input) {
678        // insert the first type1 (from the top of this function)
679        ty1s.insert(0, first_type1);
680        let member = Member {
681            key: None,
682            value: Type(ty1s),
683        };
684        return Ok((input, member));
685    }
686
687    Err(nom::Err::Error(parse_error(ErrorKind::Unparseable, "grpent_member")))
688}
689
690#[rustfmt::skip]
691fn grpent_parens(input: &str) -> JResult<&str, Group> {
692    delimited(
693        charx('('),
694        delimited(
695            ws,
696            group,
697            ws,
698        ),
699        charx(')')
700    )(input)
701}
702
703#[rustfmt::skip]
704fn grpent_val(input: &str) -> JResult<&str, GrpEntVal> {
705    alt((
706        map(grpent_member, GrpEntVal::Member),
707        map(ident, |s| GrpEntVal::Groupname(s.into())),
708        map(grpent_parens, GrpEntVal::Parenthesized),
709    ))
710    (input)
711}
712
713// A helper function that does u64->usize conversion, returning
714// CowParseError(MalformedInteger) on failure.
715fn try_into_int<T, U>(x: T, source: &str) -> Result<U, CowParseError>
716where
717    U: TryFrom<T>,
718{
719    <U>::try_from(x).map_err(|_| parse_error(ErrorKind::MalformedInteger, source))
720}
721
722// occur = [uint] "*" [uint]
723//       / "+"
724//       / "?"
725#[rustfmt::skip]
726fn occur_star(input: &str) -> JResult<&str, Occur> {
727    let f = tuple((
728        opt(uint_u64),
729        tag("*"),
730        opt(uint_u64),
731    ));
732    // FIXME: it's really not the parser's business to be inventing an upper
733    // limit here.  Plus, the use of usize::MAX is kind of gross.
734    // The parser should leave these as Option and leave it to others to
735    // decide what to do with that.
736    map_res(f, |tup| -> Result<Occur, CowParseError> {
737        if tup.0.is_none() && tup.2.is_none() {
738            Ok(Occur::ZeroOrMore)
739        } else {
740            let lower: usize = match tup.0 {
741                Some(n) => try_into_int(n, input)?,
742                None => 0,
743            };
744            let upper: usize = match tup.2 {
745                Some(n) => try_into_int(n, input)?,
746                None => std::usize::MAX,
747            };
748            Ok(Occur::Numbered(lower, upper))
749        }
750    })
751    (input)
752}
753
754#[rustfmt::skip]
755fn occur(input: &str) -> JResult<&str, Occur> {
756    alt((
757        occur_star,
758        valuex(Occur::OneOrMore, tag("+")),
759        valuex(Occur::Optional, tag("?"))
760    ))
761    (input)
762}
763
764// grpent = [occur S] [memberkey S] type
765//        / [occur S] groupname [genericarg]  ; preempted by above
766//        / [occur S] "(" S group S ")"
767
768#[rustfmt::skip]
769fn grpent(input: &str) -> JResult<&str, GrpEnt> {
770    let f = pair(
771        opt(terminated(occur, ws)),
772        grpent_val
773    );
774    map(f, |(occur, val)| GrpEnt{ occur, val } )
775    (input)
776}
777
778// grpchoice = zero-or-more "grpent optional-comma"
779#[rustfmt::skip]
780fn grpchoice(input: &str) -> JResult<&str, GrpChoice> {
781    let f = many0(
782        terminated(grpent, optcom)
783    );
784    map(f, GrpChoice)
785    (input)
786}
787
788// group = grpchoice *(S "//" S grpchoice)
789#[rustfmt::skip]
790fn group(input: &str) -> JResult<&str, Group> {
791
792    // It would have been great to write this as
793    //  separated_list1(
794    //      tag("//"),
795    //      grpchoice)
796    // but separated_list1 returns an error if the
797    // list-item succeeds on "", which grpchoice does.
798
799    let f = pair(
800        grpchoice,
801        many0(preceded(
802            delimited(
803                ws,
804                tag("//"),
805                ws,
806            ),
807            grpchoice
808        ))
809    );
810
811    map(f, |(first, mut rest)| {
812        // Build a new vector containing all the grpchoice elements.
813        let mut gcs = vec![first];
814        gcs.append(&mut rest);
815        Group(gcs)
816    })(input)
817}
818
819// "(" S type S ")"
820#[rustfmt::skip]
821fn type2_parens(input: &str) -> JResult<&str, Type> {
822    delimited(
823        charx('('),
824        delimited(
825            ws,
826            ty,
827            ws,
828        ),
829        charx(')')
830    )(input)
831}
832
833// "{" S group S "}"
834#[rustfmt::skip]
835fn type2_map(input: &str) -> JResult<&str, Group> {
836    delimited(
837        charx('{'),
838        delimited(
839            ws,
840            group,
841            ws,
842        ),
843        charx('}')
844    )(input)
845}
846
847// "[" S group S "]"
848#[rustfmt::skip]
849fn type2_array(input: &str) -> JResult<&str, Group> {
850    delimited(
851        charx('['),
852        delimited(
853            ws,
854            group,
855            ws,
856        ),
857        charx(']')
858    )(input)
859}
860
861// "~" S typename [genericarg]
862#[rustfmt::skip]
863fn type2_unwrap(input: &str) -> JResult<&str, NameGeneric> {
864    preceded(
865        tag("~"),
866        preceded(
867            ws,
868            name_generic
869        )
870    )
871    (input)
872}
873
874// "&" S groupname [genericarg]
875// I call the & operator "choice-ify". RFC 8610 (see 2.2.2.2) doesn't say
876// what that operator should be called, and "group choice" already means
877// something different.
878#[rustfmt::skip]
879fn type2_choiceify(input: &str) -> JResult<&str, NameGeneric> {
880    preceded(
881        tag("&"),
882        preceded(
883            ws,
884            name_generic
885        )
886    )
887    (input)
888}
889
890// "&" S "(" S group S ")"
891#[rustfmt::skip]
892fn type2_choiceify_inline(input: &str) -> JResult<&str, Group> {
893    preceded(
894        tag("&"),
895        preceded(
896            ws,
897            delimited(
898                charx('('),
899                delimited(
900                    ws,
901                    group,
902                    ws,
903                ),
904                charx(')')
905            )
906        )
907    )
908    (input)
909}
910
911// type2 = value
912//       / typename [genericarg]
913//       / "(" S type S ")"
914//       / "{" S group S "}"
915//       / "[" S group S "]"
916//       / "~" S typename [genericarg]
917//       / "&" S "(" S group S ")"
918//       / "&" S groupname [genericarg]
919//       / "#" "6" ["." uint] "(" S type S ")"
920//       / "#" DIGIT ["." uint]
921//       / "#"
922#[rustfmt::skip]
923fn type2(input: &str) -> JResult<&str, Type2> {
924    alt((
925        map(value, Type2::Value),
926        map(name_generic, Type2::Typename),
927        map(type2_parens, Type2::Parethesized),
928        map(type2_map, Type2::Map),
929        map(type2_array, Type2::Array),
930        map(type2_unwrap, Type2::Unwrap),
931        map(type2_choiceify_inline, Type2::ChoiceifyInline),
932        map(type2_choiceify, Type2::Choiceify),
933    ))
934    (input)
935}
936
937// Returns the string containing the control identifier
938#[rustfmt::skip]
939fn control_op(input: &str) -> JResult<&str, &str> {
940    preceded(
941        tag("."),
942        ident
943    )
944    (input)
945}
946
947// Returns the range or control operator string
948// (either ".." or "..." or the control identifier)
949#[rustfmt::skip]
950fn range_or_control_op(input: &str) -> JResult<&str, (&str, Type2)> {
951    pair(
952        alt((
953            tag("..."),
954            tag(".."),
955            control_op
956        )),
957        preceded(
958            ws,
959            type2
960        )
961    )
962    (input)
963}
964
965// type1 = type2 [S (rangeop / ctlop) S type2]
966#[rustfmt::skip]
967fn type1(input: &str) -> JResult<&str, Type1> {
968    let f = pair(
969        type2,
970        opt(
971            preceded(
972                ws,
973                range_or_control_op
974            )
975        )
976    );
977    map(f, |ty1| match ty1 {
978        (ty2, None) => Type1::Simple(ty2),
979        (start, Some(("..", end))) => Type1::Range(TypeRange {
980            start,
981            inclusive: true,
982            end,
983        }),
984        (start, Some(("...", end))) => Type1::Range(TypeRange {
985            start,
986            inclusive: false,
987            end,
988        }),
989        (target, Some((op, arg))) => Type1::Control(TypeControl {
990            target,
991            op: op.into(),
992            arg,
993        }),
994    })
995    (input)
996}
997
998// type = type1 [ / type1 ... ]  (skipping over type1 for now)
999#[rustfmt::skip]
1000fn ty(input: &str) -> JResult<&str, Type> {
1001    let f = separated_list1(
1002        delimited(ws, tag("/"), ws),
1003        type1
1004    );
1005    map(f, Type)
1006    (input)
1007}
1008
1009// rule = typename [genericparm] S assignt S type
1010//      / groupname [genericparm] S assigng S grpent
1011// Note that the first one ends with "type", while
1012// the second one ends with "group".
1013// So "foo = (bar)" will be forced down the second path.
1014//
1015// The most efficient parsing would be
1016// 1. name [genericparm] ws
1017// 2. = type
1018//    = grpent
1019//    /= type
1020//    //= grpent
1021//
1022
1023// This is the right side of a rule: one of:
1024//     assignt S type
1025//     assigng S grpent
1026#[rustfmt::skip]
1027fn rule_val(input: &str) -> JResult<&str, RuleVal> {
1028    let f = separated_pair(
1029        tag("="),
1030        ws,
1031        alt((
1032            map(ty, RuleVal::AssignType),
1033            map(grpent, RuleVal::AssignGroup)
1034        ))
1035    );
1036    // We're just throwing away the operator for now, but we'll need it
1037    // later when we implement extend operators /= //=
1038    map(f, |(_op, val)| val )
1039    (input)
1040}
1041
1042// genericparm = "<" S id S *("," S id S ) ">"
1043#[rustfmt::skip]
1044fn generic_parm(input: &str) -> JResult<&str, Vec<&str>> {
1045    delimited(
1046        pair(tag("<"), ws),
1047        separated_list1(
1048            pair(tag(","), ws),
1049            terminated(ident, ws)),
1050        tag(">"),
1051    )(input)
1052}
1053
1054// genericarg = "<" S type1 S *("," S type1 S ) ">"
1055#[rustfmt::skip]
1056fn generic_arg(input: &str) -> JResult<&str, Vec<Type1>> {
1057    delimited(
1058        pair(tag("<"), ws),
1059        separated_list1(
1060            pair(tag(","), ws),
1061            terminated(type1, ws)),
1062        tag(">"),
1063    )(input)
1064}
1065
1066// A type or group name, followed by optional generic arguments.
1067#[rustfmt::skip]
1068fn name_generic(input: &str) -> JResult<&str, NameGeneric> {
1069    let f = pair(ident, opt(generic_arg));
1070    map(f, |(name, generic)| {
1071        // Replace None with empty Vec.
1072        let generic_args = generic.unwrap_or_default();
1073        NameGeneric {
1074            name: name.to_string(),
1075            generic_args,
1076        }
1077    })
1078    (input)
1079}
1080
1081#[rustfmt::skip]
1082fn rule(input: &str) -> JResult<&str, Rule> {
1083    let f = separated_pair(
1084        pair(
1085            ident,
1086            opt(generic_parm)
1087        ),
1088        ws,
1089        rule_val
1090    );
1091    map(f, |((name, gp), val)| Rule {
1092        name: name.into(),
1093        // turn Vec<&str> into Vec<String>
1094        generic_parms: gp.unwrap_or_default().drain(..).map(|s| s.to_string()).collect(),
1095        val,
1096    })(input)
1097}
1098
1099// cddl = S 1*(rule S)
1100#[rustfmt::skip]
1101fn cddl(input: &str) -> JResult<&str, Cddl> {
1102    let f = preceded(ws,
1103        many1(
1104            terminated(rule, ws)
1105        )
1106    );
1107    map(f, |r| Cddl{rules: r})
1108    (input)
1109}
1110
1111#[rustfmt::skip]
1112fn cddl_slice(input: &str) -> JResult<&str, CddlSlice> {
1113    let f = preceded(ws,
1114        many1(
1115            terminated(
1116                map(recognizer(rule), |(s, r)| {
1117                    (r, s.to_string())
1118                }),
1119                ws
1120            )
1121        )
1122    );
1123    map(f, |r| CddlSlice{rules: r})
1124    (input)
1125}
1126
1127/// The main entry point for parsing CDDL text.
1128///
1129/// If successful, it will return a [`Cddl`] instance containing all the rules
1130/// from the input text.
1131///
1132/// # Examples
1133/// ```
1134/// use cddl_cat::parse_cddl;
1135///
1136/// let input = "map = { name: tstr }";
1137/// assert!(parse_cddl(input).is_ok());
1138/// ```
1139///
1140pub fn parse_cddl(input: &str) -> Result<Cddl, ParseError> {
1141    let result = all_consuming(cddl)(input)?;
1142    Ok(result.1)
1143}
1144
1145/// An entry point for parsing CDDL text, preserving rule strings
1146///
1147/// This operates exactly like [`parse_cddl`], but stores a copy of the rule's
1148/// original CDDL text.
1149pub fn slice_parse_cddl(input: &str) -> Result<CddlSlice, ParseError> {
1150    let result = all_consuming(cddl_slice)(input)?;
1151    Ok(result.1)
1152}
1153
1154// Useful utilities for testing the parser.
1155#[cfg(test)]
1156#[macro_use]
1157mod test_utils {
1158    use super::*;
1159
1160    // Generate a Vec<String> the same way we would generate a Vec<&str>.
1161    macro_rules! vec_strings {
1162        ($($str:expr),*) => ({
1163            vec![$(String::from($str),)*] as Vec<String>
1164        });
1165    }
1166
1167    // Given a string, generate a NameGeneric containing a type name.
1168    impl From<&str> for NameGeneric {
1169        fn from(s: &str) -> Self {
1170            NameGeneric {
1171                name: s.to_string(),
1172                generic_args: Vec::new(),
1173            }
1174        }
1175    }
1176
1177    // Given a string, generate a Type2 containing a type name.
1178    impl From<&str> for Type2 {
1179        fn from(s: &str) -> Self {
1180            Type2::Typename(s.into())
1181        }
1182    }
1183
1184    // Given a string, generate a Type1 containing a type name.
1185    impl From<&str> for Type1 {
1186        fn from(s: &str) -> Self {
1187            Type1::Simple(Type2::Typename(s.into()))
1188        }
1189    }
1190
1191    // Given a Value, generate a Type1.
1192    impl From<Value> for Type1 {
1193        fn from(v: Value) -> Self {
1194            Type1::Simple(Type2::Value(v))
1195        }
1196    }
1197
1198    // Given a string, generate a Member containing a no-key value.
1199    impl From<&str> for Member {
1200        fn from(s: &str) -> Self {
1201            Member {
1202                key: None,
1203                value: s.into(),
1204            }
1205        }
1206    }
1207
1208    // Given a Value, generate a Member containing a no-key value.
1209    impl From<Value> for Member {
1210        fn from(v: Value) -> Self {
1211            Member {
1212                key: None,
1213                value: Type1::from(v).into(),
1214            }
1215        }
1216    }
1217
1218    // Given a string, generate a GrpEnt containing that type name.
1219    impl From<&str> for GrpEnt {
1220        fn from(s: &str) -> GrpEnt {
1221            GrpEnt {
1222                occur: None,
1223                val: GrpEntVal::Member(s.into()),
1224            }
1225        }
1226    }
1227
1228    // A trait for generating literals.
1229    pub trait CreateLiteral {
1230        fn literal(self) -> Value;
1231    }
1232
1233    // Create a literal string.
1234    impl CreateLiteral for &str {
1235        fn literal(self) -> Value {
1236            Value::Text(self.to_string())
1237        }
1238    }
1239
1240    // Create a literal integer.
1241    impl CreateLiteral for i64 {
1242        fn literal(self) -> Value {
1243            if self >= 0 {
1244                Value::Uint(self as u64)
1245            } else {
1246                Value::Nint(self)
1247            }
1248        }
1249    }
1250
1251    pub fn bareword(s: &str) -> MemberKeyVal {
1252        MemberKeyVal::Bareword(s.into())
1253    }
1254
1255    impl From<Value> for Type2 {
1256        fn from(x: Value) -> Type2 {
1257            Type2::Value(x)
1258        }
1259    }
1260
1261    // Given a Value (a literal), generate a MemberKeyVal.
1262    impl From<Value> for MemberKeyVal {
1263        fn from(k: Value) -> MemberKeyVal {
1264            MemberKeyVal::Value(k)
1265        }
1266    }
1267
1268    // Given a Type1, generate a MemberKeyVal.
1269    impl From<Type1> for MemberKeyVal {
1270        fn from(t: Type1) -> MemberKeyVal {
1271            MemberKeyVal::Type1(t)
1272        }
1273    }
1274
1275    // Given a string, generate a MemberKeyVal (treating it as a type name).
1276    impl From<&str> for MemberKeyVal {
1277        fn from(k: &str) -> MemberKeyVal {
1278            MemberKeyVal::Type1(k.into())
1279        }
1280    }
1281
1282    #[derive(Copy, Clone)]
1283    pub enum MemberCut {
1284        Cut,
1285        NoCut,
1286    }
1287    pub use MemberCut::*;
1288
1289    impl From<MemberCut> for bool {
1290        fn from(c: MemberCut) -> bool {
1291            match c {
1292                Cut => true,
1293                NoCut => false,
1294            }
1295        }
1296    }
1297
1298    pub fn kv_member<K, V>(k: K, v: V, cut: MemberCut) -> Member
1299    where
1300        K: Into<MemberKeyVal>,
1301        V: Into<Type1>,
1302    {
1303        let v: Type1 = v.into();
1304        Member {
1305            key: Some(MemberKey {
1306                val: k.into(),
1307                cut: cut.into(),
1308            }),
1309            value: v.into(),
1310        }
1311    }
1312
1313    pub fn kv<K, V>(k: K, v: V, cut: MemberCut) -> GrpEnt
1314    where
1315        K: Into<MemberKeyVal>,
1316        V: Into<Type1>,
1317    {
1318        GrpEnt {
1319            occur: None,
1320            val: GrpEntVal::Member(kv_member(k, v, cut)),
1321        }
1322    }
1323
1324    pub fn gen_group<T: Into<GrpEnt>>(mut members: Vec<T>) -> Group {
1325        // convert the members into individual GrpEnt structs
1326        let grpents: Vec<GrpEnt> = members.drain(..).map(|x| x.into()).collect();
1327        // construct a Group containing one GrpChoice.
1328        Group(vec![GrpChoice(grpents)])
1329    }
1330
1331    pub fn gen_array<T: Into<GrpEnt>>(members: Vec<T>) -> Type1 {
1332        Type1::Simple(Type2::Array(gen_group(members)))
1333    }
1334
1335    pub fn gen_map<T: Into<GrpEnt>>(members: Vec<T>) -> Type1 {
1336        Type1::Simple(Type2::Map(gen_group(members)))
1337    }
1338
1339    // Generate a single-Type1 Type struct.
1340    impl From<Type1> for Type {
1341        fn from(x: Type1) -> Self {
1342            Type(vec![x])
1343        }
1344    }
1345
1346    // Generate a single-Type1 Type struct from a plain string (as a type name).
1347    impl From<&str> for Type {
1348        fn from(s: &str) -> Self {
1349            Type(vec![Type1::from(s)])
1350        }
1351    }
1352
1353    // Create a type name with generic arguments
1354    pub fn generic<T: Into<Type1>>(name: &str, mut generic_args: Vec<T>) -> Type1 {
1355        Type1::Simple(Type2::Typename(NameGeneric {
1356            name: name.to_string(),
1357            generic_args: generic_args.drain(..).map(|x| x.into()).collect(),
1358        }))
1359    }
1360}
1361
1362#[cfg(test)]
1363mod tests {
1364    use super::test_utils::*;
1365    use super::*;
1366
1367    #[test]
1368    fn test_whitespace() {
1369        let cddl = "  ; a comment\n        \r\n; another;;;comment\n  ";
1370        let (remainder, _result) = ws(cddl).unwrap();
1371        assert_eq!(remainder, "");
1372    }
1373
1374    #[test]
1375    fn test_ident() {
1376        assert_eq!(ident("a"), Ok(("", "a")));
1377        assert_eq!(ident("a1"), Ok(("", "a1")));
1378        assert_eq!(ident("a.1"), Ok(("", "a.1")));
1379        assert_eq!(ident("a1."), Ok((".", "a1")));
1380        assert_eq!(ident("@a1"), Ok(("", "@a1")));
1381        assert_eq!(ident("a..b"), Ok(("", "a..b")));
1382        assert!(ident("1a").is_err());
1383    }
1384
1385    #[test]
1386    fn test_uint() {
1387        assert_eq!(uint_u64("999"), Ok(("", 999)));
1388        assert_eq!(uint_u64("0"), Ok(("", 0)));
1389        assert_eq!(uint_u64("0x100"), Ok(("", 256)));
1390        assert_eq!(uint_u64("0b101"), Ok(("", 5)));
1391        // We're not supposed to parse leading zeros.
1392        assert_eq!(uint_u64("00"), Ok(("0", 0)));
1393    }
1394
1395    #[test]
1396    fn test_float_or_int() {
1397        assert_eq!(float_or_int("0.0"), Ok(("", Value::Float(0.0))));
1398        assert_eq!(float_or_int("1e99"), Ok(("", Value::Float(1e99))));
1399        assert_eq!(float_or_int("-1e-99"), Ok(("", Value::Float(-1e-99))));
1400        assert_eq!(float_or_int("123"), Ok(("", Value::Uint(123))));
1401        assert_eq!(float_or_int("-123"), Ok(("", Value::Nint(-123))));
1402        assert_eq!(float_or_int("1e"), Ok(("e", Value::Uint(1))));
1403        assert_eq!(float_or_int("1."), Ok((".", Value::Uint(1))));
1404        assert!(float_or_int("abc").is_err());
1405
1406        assert_eq!(float_or_int("0x100"), Ok(("", Value::Uint(256))));
1407        assert_eq!(float_or_int("0b101"), Ok(("", Value::Uint(5))));
1408        // We're not supposed to parse leading zeros.
1409        assert_eq!(float_or_int("00"), Ok(("0", Value::Uint(0))));
1410
1411        assert_eq!(float_or_int("-0x100"), Ok(("", Value::Nint(-256))));
1412        assert_eq!(float_or_int("-0b101"), Ok(("", Value::Nint(-5))));
1413
1414        // While this is allowed in the CDDL grammar, it doesn't make logical sense
1415        // so we want to return an error.
1416        assert!(float_or_int("0b1e99").is_err());
1417        assert!(float_or_int("0b1.1").is_err());
1418    }
1419
1420    #[test]
1421    fn test_bytestring() {
1422        let result1 = bytestring("'abc'");
1423        let result = format!("{:?}", result1);
1424        assert_eq!(result, r#"Ok(("", [97, 98, 99]))"#);
1425
1426        // Same thing, in hex format
1427        assert_eq!(result1, bytestring("h'61 62 63'"));
1428        assert_eq!(result1, bytestring("h' 6 1626 3  '"));
1429
1430        // Same thing, in base64 format
1431        assert_eq!(result1, bytestring("b64'YWJj'"));
1432
1433        // bytestring in UTF-8 with escapes
1434        assert_eq!(bytestring(r#"'a\nb'"#), Ok(("", "a\nb".into())));
1435        assert_eq!(bytestring(r#"'\uD834\uDD1E'"#), Ok(("", "š„ž".into())));
1436
1437        // Non-text bytes
1438        let result2 = vec![0u8, 0xFF, 1, 0x7F];
1439        assert_eq!(Ok(("", result2.clone())), bytestring("h'00FF017f'"));
1440        assert_eq!(Ok(("", result2)), bytestring("b64'AP8Bfw=='"));
1441
1442        // Empty inputs
1443        assert_eq!(Ok(("", vec![])), bytestring("h''"));
1444        assert_eq!(Ok(("", vec![])), bytestring("b64''"));
1445
1446        fn fail_kind(e: nom::Err<CowParseError>) -> ErrorKind {
1447            match e {
1448                nom::Err::Failure(e) => e.kind,
1449                _ => panic!("expected nom::err::Failure, got {:?}", e),
1450            }
1451        }
1452
1453        // Bad hex character
1454        assert_eq!(
1455            fail_kind(bytestring("h'0g1234'").unwrap_err()),
1456            ErrorKind::MalformedHex
1457        );
1458
1459        // Bad base64 character "!"
1460        assert_eq!(
1461            fail_kind(bytestring("b64'AP!Bfw=='").unwrap_err()),
1462            ErrorKind::MalformedBase64
1463        );
1464
1465        // wrong flavor of base64: CDDL requires the "base64url" encoding.
1466        assert_eq!(
1467            // base64 encoding of FBEF00 using the wrong encoder.
1468            fail_kind(bytestring("b64'++8A'").unwrap_err()),
1469            ErrorKind::MalformedBase64
1470        );
1471        assert_eq!(
1472            // base64 encoding of FFFFFF using the wrong encoder.
1473            fail_kind(bytestring("b64'////'").unwrap_err()),
1474            ErrorKind::MalformedBase64
1475        );
1476    }
1477
1478    #[test]
1479    fn test_text() {
1480        assert!(is_unescaped_schar('A'));
1481        assert!(is_unescaped_schar('恮'));
1482        assert!(is_unescaped_schar(std::char::from_u32(0x10FF0).unwrap()));
1483        assert!(!is_unescaped_schar(0x7F as char));
1484
1485        assert_eq!(unescaped_schar("A恮"), Ok(("", "A恮")));
1486
1487        assert_eq!(sesc(r#"\n"#), Ok(("", "n")));
1488        assert_eq!(sesc(r#"\nn"#), Ok(("n", "n")));
1489        assert_eq!(sesc(r#"\恮"#), Ok(("", "恮")));
1490
1491        // FIXME: sesc is allowing characters it shouldn't.
1492        // assert_eq!(sesc("\\\x7F"), Ok(("\\\x7F", "")));
1493
1494        assert_eq!(schar(r#"Ab! \c 恮 \\"#), Ok(("", r#"Ab! \c 恮 \\"#)));
1495        assert_eq!(schar(r#"a\nb"#), Ok(("", r#"a\nb"#)));
1496        assert_eq!(schar("a\nb"), Ok(("\nb", "a")));
1497
1498        assert!(text_literal("\"a\nb").is_err());
1499        assert!(text_literal("abc").is_err());
1500
1501        assert_eq!(text_literal(r#""""#), Ok(("", "".into())));
1502        assert_eq!(text_literal(r#""a\nb""#), Ok(("", "a\nb".into())));
1503        assert_eq!(text_literal(r#""\uD834\uDD1E""#), Ok(("", "š„ž".into())));
1504        assert_eq!(text_literal(r#""恮""#), Ok(("", "恮".into())));
1505    }
1506
1507    #[test]
1508    fn test_value() {
1509        assert_eq!(value("123"), Ok(("", Value::Uint(123))));
1510        assert_eq!(value(r#""abc""#), Ok(("", Value::Text("abc".into()))));
1511        assert!(value("abc").is_err());
1512    }
1513
1514    #[test]
1515    fn test_member() {
1516        let result = grpent_member("a:b");
1517        assert_eq!(
1518            result.unwrap().1,
1519            kv_member(MemberKeyVal::Bareword("a".into()), "b", Cut)
1520        );
1521
1522        let result = grpent_member("foo");
1523        assert_eq!(result.unwrap().1, "foo".into());
1524
1525        let result = grpent_member("a => b");
1526        assert_eq!(result.unwrap().1, kv_member("a", "b", NoCut));
1527
1528        let result = grpent_member("42 ^ => b");
1529        assert_eq!(
1530            result.unwrap().1,
1531            kv_member(Type1::from(42.literal()), "b", Cut)
1532        );
1533
1534        let result = grpent_member("abc<T> => def");
1535        assert_eq!(
1536            result.unwrap().1,
1537            kv_member(generic("abc", vec!["T"]), "def", NoCut)
1538        );
1539
1540        // Generic arguments not allowed with ":"
1541        grpent_member("abc<T> : def").unwrap_err();
1542    }
1543
1544    #[test]
1545    fn test_grpent_parens() {
1546        let result = grpent_parens("()");
1547        assert_eq!(result.unwrap().1, Group(vec![GrpChoice(vec![])]));
1548    }
1549
1550    #[test]
1551    fn test_grpent_val() {
1552        let result = grpent_val("foo");
1553        assert_eq!(result.unwrap().1, GrpEntVal::Member("foo".into()));
1554
1555        let result = grpent_val("17");
1556        assert_eq!(result.unwrap().1, GrpEntVal::Member(17.literal().into()));
1557    }
1558
1559    #[test]
1560    fn test_occur() {
1561        assert_eq!(occur("?"), Ok(("", Occur::Optional)));
1562        assert_eq!(occur("+"), Ok(("", Occur::OneOrMore)));
1563        assert_eq!(occur("*"), Ok(("", Occur::ZeroOrMore)));
1564        assert_eq!(occur("*9"), Ok(("", Occur::Numbered(0, 9))));
1565        assert_eq!(occur("7*"), Ok(("", Occur::Numbered(7, std::usize::MAX))));
1566        assert_eq!(occur("7*9"), Ok(("", Occur::Numbered(7, 9))));
1567        assert_eq!(occur("0b100*0x10"), Ok(("", Occur::Numbered(4, 16))));
1568    }
1569
1570    #[test]
1571    fn test_grpent() {
1572        let result = grpent("foo").unwrap();
1573        assert_eq!(result.1, "foo".into());
1574
1575        let result = grpent("foo: bar").unwrap();
1576        assert_eq!(
1577            result.1,
1578            kv(MemberKeyVal::Bareword("foo".into()), "bar", Cut)
1579        );
1580    }
1581
1582    #[test]
1583    fn test_grpchoice_empty() {
1584        let result = grpchoice("").unwrap();
1585        assert_eq!(result.1, GrpChoice(vec![]));
1586    }
1587
1588    #[test]
1589    fn test_group_empty() {
1590        let result = group("").unwrap();
1591        assert_eq!(result.1, Group(vec![GrpChoice(vec![])]));
1592    }
1593
1594    #[test]
1595    fn test_type1() {
1596        let result = type1("1 .. 9");
1597        assert_eq!(
1598            result.unwrap().1,
1599            Type1::Range(TypeRange {
1600                start: 1.literal().into(),
1601                end: 9.literal().into(),
1602                inclusive: true
1603            })
1604        );
1605
1606        let result = type1("0x10 .. 0x1C");
1607        assert_eq!(
1608            result.unwrap().1,
1609            Type1::Range(TypeRange {
1610                start: 16.literal().into(),
1611                end: 28.literal().into(),
1612                inclusive: true
1613            })
1614        );
1615
1616        let result = type1("1 ... 9");
1617        assert_eq!(
1618            result.unwrap().1,
1619            Type1::Range(TypeRange {
1620                start: 1.literal().into(),
1621                end: 9.literal().into(),
1622                inclusive: false
1623            })
1624        );
1625
1626        let result = type1("uint .size 3");
1627        assert_eq!(
1628            result.unwrap().1,
1629            Type1::Control(TypeControl {
1630                target: "uint".into(),
1631                op: "size".to_string(),
1632                arg: 3.literal().into(),
1633            })
1634        );
1635
1636        // RFC8610 2.2.2.1 points out that "min..max" is not a range, but an identifier
1637        // (because '.' is a valid ident character).
1638        let result = type2("min..max");
1639        assert_eq!(result.unwrap().1, "min..max".into());
1640    }
1641
1642    #[test]
1643    fn test_grpchoice() {
1644        let result = grpchoice("abc").unwrap();
1645        assert_eq!(result.1, GrpChoice(vec!["abc".into()]));
1646
1647        let result = grpchoice("abc, def").unwrap();
1648        assert_eq!(result.1, GrpChoice(vec!["abc".into(), "def".into(),]));
1649    }
1650
1651    #[test]
1652    fn test_generic_parm() {
1653        assert!(generic_parm("").is_err());
1654
1655        assert!(generic_parm("<>").is_err());
1656
1657        let result = generic_parm("<foo>").unwrap();
1658        assert_eq!(result.1, vec!["foo"]);
1659
1660        let result = generic_parm("<foo,bar>").unwrap();
1661        assert_eq!(result.1, vec!["foo", "bar"]);
1662
1663        let result = generic_parm("< foo , _bar_ >").unwrap();
1664        assert_eq!(result.1, vec!["foo", "_bar_"]);
1665    }
1666
1667    #[test]
1668    fn test_generic_arg() {
1669        assert!(generic_arg("").is_err());
1670
1671        assert!(generic_arg("<>").is_err());
1672
1673        let result = generic_arg("<foo>").unwrap();
1674        assert_eq!(result.1, vec!["foo".into()]);
1675
1676        let result = generic_arg("<foo,bar>").unwrap();
1677        assert_eq!(result.1, vec!["foo".into(), "bar".into()]);
1678
1679        let result = generic_arg("< foo , _bar_ >").unwrap();
1680        assert_eq!(result.1, vec!["foo".into(), "_bar_".into()]);
1681    }
1682
1683    #[test]
1684    fn choiceify() {
1685        assert_eq!(
1686            type2("&foo").unwrap().1,
1687            Type2::Choiceify(NameGeneric {
1688                name: "foo".into(),
1689                generic_args: vec![],
1690            })
1691        );
1692        assert_eq!(
1693            type2("&(a:1)").unwrap().1,
1694            Type2::ChoiceifyInline(gen_group(vec![kv(bareword("a"), 1.literal(), Cut),]))
1695        );
1696    }
1697
1698    #[test]
1699    fn test_rule() {
1700        let result = rule("foo=bar").unwrap().1;
1701
1702        assert_eq!(
1703            result,
1704            Rule {
1705                name: "foo".into(),
1706                generic_parms: vec![],
1707                val: RuleVal::AssignType("bar".into())
1708            }
1709        );
1710
1711        let result = rule("foo=(bar, baz)").unwrap().1;
1712        assert_eq!(
1713            result,
1714            Rule {
1715                name: "foo".into(),
1716                generic_parms: vec![],
1717                val: RuleVal::AssignGroup(GrpEnt {
1718                    occur: None,
1719                    val: GrpEntVal::Parenthesized(gen_group(vec!["bar", "baz"])),
1720                })
1721            }
1722        );
1723
1724        let result = rule("message<t, v> = [t, v]").unwrap().1;
1725        assert_eq!(
1726            result,
1727            Rule {
1728                name: "message".into(),
1729                generic_parms: vec_strings!["t", "v"],
1730                val: RuleVal::AssignType(gen_array(vec!["t", "v"]).into())
1731            }
1732        );
1733    }
1734
1735    #[test]
1736    fn test_cddl() {
1737        let result = parse_cddl("foo = {\"a\": bar,\n b => baz}");
1738
1739        assert_eq!(
1740            result.unwrap(),
1741            Cddl {
1742                rules: vec![Rule {
1743                    name: "foo".into(),
1744                    generic_parms: vec![],
1745                    val: RuleVal::AssignType(Type(vec![gen_map(vec![
1746                        kv("a".literal(), "bar", Cut),
1747                        kv("b", "baz", NoCut)
1748                    ])]))
1749                }]
1750            }
1751        );
1752    }
1753
1754    #[test]
1755    fn test_cddl_slice() {
1756        let result = slice_parse_cddl(" foo = { a: tstr } bar = \n[ int ] ").unwrap();
1757        assert_eq!(result.rules[0].1, "foo = { a: tstr }");
1758        assert_eq!(result.rules[1].1, "bar = \n[ int ]");
1759    }
1760
1761    // FIXME: these are things I discovered while validating cbor.  Move them to their own tests?
1762    #[test]
1763    fn test_stuff() {
1764        parse_cddl("thing = { foo : tstr }").unwrap();
1765        parse_cddl("bar = (c: int)").unwrap(); // This is a rule containing a group assignment.
1766        parse_cddl("thing = {agroup empty} agroup = (age: int, name: tstr) empty = ()").unwrap();
1767        parse_cddl(
1768            r#"
1769            address = { delivery }
1770
1771            delivery = (
1772            street: tstr, ? "number": uint, city //
1773            po_box: uint, city //
1774            per_pickup: true )
1775
1776            city = (
1777            name: tstr, zip_code: uint
1778            )"#,
1779        )
1780        .unwrap();
1781    }
1782
1783    #[test]
1784    fn test_errors() {
1785        let err = parse_cddl("x=9999999999999999999999999999999").unwrap_err();
1786        assert_eq!(err.kind, ErrorKind::MalformedInteger);
1787
1788        let err = parse_cddl(r#"x="\ud800""#).unwrap_err();
1789        assert_eq!(err.kind, ErrorKind::MalformedText);
1790
1791        let err = parse_cddl("x=h'61 62 6'").unwrap_err();
1792        assert_eq!(err.kind, ErrorKind::MalformedHex);
1793    }
1794}