ion_rs/text/parsers/
containers.rs

1use crate::raw_symbol_token::RawSymbolToken;
2use crate::text::parse_result::{IonParseResult, UpgradeIResult, UpgradeParser};
3use nom::branch::alt;
4use nom::bytes::streaming::tag;
5use nom::combinator::{map, peek, value};
6use nom::sequence::{delimited, pair, preceded, terminated};
7use nom::{IResult, Parser};
8
9use crate::text::parsers::annotations::parse_annotations;
10use crate::text::parsers::comments::whitespace_or_comments;
11use crate::text::parsers::string::parse_string;
12use crate::text::parsers::symbol::{parse_operator, parse_symbol};
13use crate::text::parsers::top_level::top_level_value;
14use crate::text::parsers::value::{annotated_container_start, annotated_scalar};
15use crate::text::text_value::{AnnotatedTextValue, TextValue};
16
17/// Matches the beginning of a container and returns a [TextValue] indicating its type.
18pub(crate) fn container_start(input: &str) -> IonParseResult<TextValue> {
19    alt((struct_start, list_start, s_expression_start))(input).upgrade()
20}
21
22/// Matches the beginning of a struct and returns a [TextValue::StructStart].
23pub(crate) fn struct_start(input: &str) -> IResult<&str, TextValue> {
24    value(TextValue::StructStart, tag("{"))(input)
25}
26
27/// Matches the beginning of a list and returns a [TextValue::ListStart].
28pub(crate) fn list_start(input: &str) -> IResult<&str, TextValue> {
29    value(TextValue::ListStart, tag("["))(input)
30}
31
32/// Matches the beginning of an s-expression and returns a [TextValue::SExpStart].
33pub(crate) fn s_expression_start(input: &str) -> IResult<&str, TextValue> {
34    value(TextValue::SExpStart, tag("("))(input)
35}
36
37/// Matches the end of a struct and returns a `&str` containing the delimiter.
38pub(crate) fn struct_end(input: &str) -> IonParseResult<&str> {
39    preceded(whitespace_or_comments, tag("}").upgrade())(input)
40}
41
42/// Matches the end of a list and returns a `&str` containing the delimiter.
43pub(crate) fn list_end(input: &str) -> IonParseResult<&str> {
44    preceded(whitespace_or_comments, tag("]").upgrade())(input)
45}
46
47/// Matches the end of an s-expression and returns a `&str` containing the delimiter.
48pub(crate) fn s_expression_end(input: &str) -> IonParseResult<&str> {
49    preceded(whitespace_or_comments, tag(")").upgrade())(input)
50}
51
52/// Matches an optional series of annotations and a TextValue. If the TextValue is not a container,
53/// this parser will also match a trailing delimiting comma (that will be consumed) or end-of-list
54/// marker (that will not be consumed). Whitespace and comments can appear throughout; they will be
55/// discarded.
56pub(crate) fn list_value(input: &str) -> IonParseResult<AnnotatedTextValue> {
57    alt((
58        // Matches a scalar value and either a delimiter or end-of-container.
59        list_scalar,
60        // If the next value in the list is a container, we only need to match the start.
61        // We'll look for the trailing delimiter or end-of-container when the reader steps out.
62        preceded(whitespace_or_comments, annotated_container_start),
63    ))(input)
64}
65
66/// Matches a (possibly annotated) non-container value in a list followed by a delimiter
67/// or end-of-container.
68pub(crate) fn list_scalar(input: &str) -> IonParseResult<AnnotatedTextValue> {
69    // A list scalar must be followed by either a comma or the end-of-list delimiter (`]`).
70    delimited(
71        // Any amount of whitespace or comments
72        whitespace_or_comments,
73        // Match the value itself (may be preceded by whitespace/comments)
74        annotated_scalar,
75        // Check for any amount of whitespace followed by a comma or end-of-list delimiter.
76        list_delimiter,
77    )(input)
78}
79
80/// Matches any amount of whitespace/comments followed by either a delimiter (which is consumed)
81/// or an end-of-container (which is not consumed).
82pub(crate) fn list_delimiter(input: &str) -> IonParseResult<()> {
83    preceded(
84        whitespace_or_comments,
85        alt((tag(",").upgrade(), peek(list_end))),
86    )
87    // TODO: This parser discards the matched &str as a workaround to a limitation in RawTextReader.
88    //       See: https://github.com/amazon-ion/ion-rust/issues/337
89    .map(|_| ())
90    .parse(input)
91}
92
93/// Returns [None] if the next token in input is an end-of-list delimiter (`]`).
94/// Otherwise, matches and returns the next value in the list using [list_value].
95pub(crate) fn list_value_or_end(input: &str) -> IonParseResult<Option<AnnotatedTextValue>> {
96    map(list_end, |_end_marker| None)
97        .or(map(list_value, Some))
98        .parse(input)
99}
100
101/// Matches an optional series of annotations and a TextValue (including operators). If the TextValue
102/// is not a container, this parser will also match a trailing delimiting whitespace character
103/// (that will be consumed) or end-of-s-expression marker (that will not be consumed).
104pub(crate) fn s_expression_value(input: &str) -> IonParseResult<AnnotatedTextValue> {
105    alt((
106        // Matches a scalar value followed by either a delimiter or end-of-container.
107        s_expression_scalar,
108        // If the next value in the s-expression is a container, we only need to match the start.
109        // We'll look for the trailing delimiter or end-of-container when the reader steps out.
110        preceded(whitespace_or_comments, annotated_container_start),
111    ))(input)
112}
113
114/// Matches a (possibly annotated) non-container value in an s-expression followed by a delimiter
115/// or end-of-container.
116pub(crate) fn s_expression_scalar(input: &str) -> IonParseResult<AnnotatedTextValue> {
117    preceded(
118        whitespace_or_comments,
119        // An s-expression value can be either...
120        alt((
121            // ...an annotated operator (`foo::++`)...
122            pair(parse_annotations, parse_operator)
123                .map(|(annotations, value)| AnnotatedTextValue::new(annotations, value)),
124            // ...a non-operator value, with or without annotations (`5`, `foo::5`, `"hello"`, etc)...
125            top_level_value,
126            // ...or an un-annotated operator (`++`).
127            parse_operator.map(|op| op.without_annotations()),
128        )),
129        // ^^^ Note that the parser order above is important.
130        //
131        // We need the s-expression parser to recognize the input `--3` as the operator `--` and the
132        // int `3` while recognizing the input `-3` as the int `-3`. If `parse_operator` runs before
133        // `top_level_value`, it will consume the sign (`-`) of negative number values, treating
134        // `-3` as an operator (`-`) and an int (`3`).
135        //
136        // Similarly, we must check for annotated operators before we check for annotated values.
137        // If we run `top_level_value` first, it will consume the annotation from the
138        // input `foo::++` as a symbol (`foo`), leaving the `::++` in the buffer.
139    )(input)
140}
141
142/// Returns [None] if the next token in input is an end-of-s-expression delimiter (`)`).
143/// Otherwise, matches and returns the next value in the s-expression using
144/// [`s_expression_value`].
145pub(crate) fn s_expression_value_or_end(input: &str) -> IonParseResult<Option<AnnotatedTextValue>> {
146    map(s_expression_end, |_end_marker| None)
147        .or(map(s_expression_value, Some))
148        .parse(input)
149}
150
151/// Always matches. Consumes nothing from input. This function is only defined for parity with the
152/// other container types.
153pub(crate) fn s_expression_delimiter(input: &str) -> IonParseResult<()> {
154    // An s-expression doesn't require *anything* to appear between values. For example:
155    //    (+(foo)-)
156    // This s-expression contains three child values:
157    // 1. an operator: `+`
158    // 2. a nested s-expression: `(foo)`
159    // 3. another operator (`-`)
160    //
161    // Notice that no delimiters appear between these values.
162    Ok((input, ()))
163}
164
165/// Matches a struct field name and returns it as a [RawSymbolToken].
166/// This function should be called before [`struct_field_value`].
167pub(crate) fn struct_field_name(input: &str) -> IonParseResult<RawSymbolToken> {
168    delimited(
169        whitespace_or_comments,
170        // We check for string first because the field name may be a long string (`'''foo'''`)
171        // and we don't want the symbol parser to interpret the first two `''`s as an empty symbol.
172        parse_string.or(parse_symbol),
173        pair(whitespace_or_comments, tag(":")),
174    )
175    .map(|value| match value {
176        TextValue::String(text) => RawSymbolToken::Text(text),
177        TextValue::Symbol(token) => token,
178        other => unreachable!(
179            "Struct field names can only be strings or symbols. Found a {:?}",
180            other
181        ),
182    })
183    .parse(input)
184}
185
186/// Matches an optional series of annotations and a TextValue. If the TextValue is not a container,
187/// this parser will also match a trailing delimiting comma (that will be consumed) or end-of-struct
188/// marker (that will not be consumed). Whitespace and comments can appear throughout; they will be
189/// discarded.
190pub(crate) fn struct_field_value(input: &str) -> IonParseResult<AnnotatedTextValue> {
191    alt((
192        // Matches a scalar value and either a delimiter or end-of-container.
193        struct_field_scalar,
194        // If the next value in the list is a container, we only need to match the start.
195        // We'll look for the trailing delimiter or end-of-container when the reader steps out.
196        preceded(whitespace_or_comments, annotated_container_start),
197    ))(input)
198}
199
200/// Matches a (possibly annotated) non-container value in an struct followed by a delimiter
201/// or end-of-container.
202pub(crate) fn struct_field_scalar(input: &str) -> IonParseResult<AnnotatedTextValue> {
203    terminated(
204        // Match the value itself (may be preceded by whitespace/comments)
205        top_level_value,
206        // Check for any amount of whitespace followed by a comma or end-of-struct delimiter.
207        struct_delimiter,
208    )(input)
209}
210
211/// Returns [None] if the next token in input is an end-of-struct delimiter (`}`).
212/// Otherwise, matches and returns the next field name in the struct using [struct_field_name].
213pub(crate) fn struct_field_name_or_end(input: &str) -> IonParseResult<Option<RawSymbolToken>> {
214    map(struct_end, |_end_marker| None)
215        .or(map(struct_field_name, Some))
216        .parse(input)
217}
218
219/// Matches any amount of whitespace/comments followed by either a delimiter (which is consumed)
220/// or an end-of-container (which is not consumed).
221pub(crate) fn struct_delimiter(input: &str) -> IonParseResult<()> {
222    preceded(whitespace_or_comments, alt((tag(","), peek(struct_end))))
223        // TODO: This parser discards the matched &str as a workaround to a limitation in RawTextReader.
224        //       See: https://github.com/amazon-ion/ion-rust/issues/337
225        .map(|_| ())
226        .parse(input)
227}
228
229#[cfg(test)]
230mod container_parsing_tests {
231    use rstest::*;
232
233    use crate::raw_symbol_token::{local_sid_token, text_token};
234    use crate::text::parsers::unit_test_support::{parse_test_err, parse_test_ok};
235    use crate::text::text_value::TextValue;
236    use crate::types::{Decimal, Int};
237
238    use super::*;
239
240    #[rstest]
241    #[case::start_of_struct("{", TextValue::StructStart)]
242    #[case::start_of_list("[", TextValue::ListStart)]
243    #[case::start_of_s_expression("(", TextValue::SExpStart)]
244    fn test_parse_container_start_ok(#[case] text: &str, #[case] expected: TextValue) {
245        parse_test_ok(container_start, text, expected)
246    }
247
248    #[rstest]
249    #[case("5")]
250    #[case("true")]
251    #[case("foo")]
252    #[case("foo::{")]
253    #[case("\"hello\"")]
254    #[case("<")]
255    fn test_parse_container_start_err(#[case] text: &str) {
256        parse_test_err(container_start, text)
257    }
258
259    #[rstest]
260    #[case("5,", TextValue::Int(Int::I64(5)).without_annotations())]
261    #[case("foo::bar::5,", TextValue::Int(Int::I64(5)).with_annotations(["foo", "bar"]))]
262    #[case("foo::bar,", TextValue::Symbol(text_token("bar")).with_annotations("foo"))]
263    #[case("bar]", TextValue::Symbol(text_token("bar")).without_annotations())]
264    #[case("7.]", TextValue::Decimal(Decimal::new(7, 0)).without_annotations())]
265    #[should_panic]
266    //       v---- Missing trailing , or ]
267    #[case("5 ", TextValue::String(String::from("<should panic>")).without_annotations())]
268    #[should_panic]
269    //      v--- No value, just a comma
270    #[case(", ", TextValue::String(String::from("<should panic>")).without_annotations())]
271    fn test_parse_list_values(#[case] text: &str, #[case] expected: AnnotatedTextValue) {
272        parse_test_ok(list_value, text, expected);
273    }
274
275    #[rstest]
276    #[case("'++',", Some(TextValue::Symbol(text_token("++")).without_annotations()))]
277    #[case("foo::'++',", Some(TextValue::Symbol(text_token("++")).with_annotations("foo")))]
278    #[case("5    ,", Some(TextValue::Int(Int::I64(5)).without_annotations()))]
279    #[case("5]", Some(TextValue::Int(Int::I64(5)).without_annotations()))]
280    #[case("]", None)]
281    #[case("  ]", None)]
282    #[case(" /*comment*/  ]", None)]
283    fn test_parse_list_value_or_end(
284        #[case] text: &str,
285        #[case] expected: Option<AnnotatedTextValue>,
286    ) {
287        parse_test_ok(list_value_or_end, text, expected);
288    }
289
290    #[rstest]
291    #[case("++ ", TextValue::Symbol(text_token("++")).without_annotations())]
292    #[case("foo::++ ", TextValue::Symbol(text_token("++")).with_annotations("foo"))]
293    #[case("5 ", TextValue::Int(Int::I64(5)).without_annotations())]
294    #[case("5)", TextValue::Int(Int::I64(5)).without_annotations())]
295    #[case("foo::bar::5 ", TextValue::Int(Int::I64(5)).with_annotations(["foo", "bar"]))]
296    //               v--- This zero allows the parser to tell that the previous value is complete.
297    #[case("foo::bar 0", TextValue::Symbol(text_token("bar")).with_annotations("foo"))]
298    #[case("bar)", TextValue::Symbol(text_token("bar")).without_annotations())]
299    #[case("7.)", TextValue::Decimal(Decimal::new(7, 0)).without_annotations())]
300    #[should_panic]
301    //       v---- Comma instead of whitespace
302    #[case("5, ", TextValue::String(String::from("<should panic>")).without_annotations())]
303    #[should_panic]
304    //      v--- Wrong closing delimiter
305    #[case("5]", TextValue::String(String::from("<should panic>")).without_annotations())]
306    fn test_parse_s_expression_values(#[case] text: &str, #[case] expected: AnnotatedTextValue) {
307        parse_test_ok(s_expression_value, text, expected);
308    }
309
310    #[rstest]
311    #[case("++ ", Some(TextValue::Symbol(text_token("++")).without_annotations()))]
312    #[case("foo::++ ", Some(TextValue::Symbol(text_token("++")).with_annotations("foo")))]
313    #[case("5 ", Some(TextValue::Int(Int::I64(5)).without_annotations()))]
314    #[case(")", None)]
315    #[case("  )", None)]
316    #[case(" /*comment*/  )", None)]
317    fn test_parse_s_expression_value_or_end(
318        #[case] text: &str,
319        #[case] expected: Option<AnnotatedTextValue>,
320    ) {
321        parse_test_ok(s_expression_value_or_end, text, expected);
322    }
323
324    #[rstest]
325    #[case("5,", TextValue::Int(Int::I64(5)).without_annotations())]
326    #[case("5  ,", TextValue::Int(Int::I64(5)).without_annotations())]
327    #[case("foo::bar::5,", TextValue::Int(Int::I64(5)).with_annotations(["foo", "bar"]))]
328    #[case("foo::bar,", TextValue::Symbol(text_token("bar")).with_annotations("foo"))]
329    #[case("bar}", TextValue::Symbol(text_token("bar")).without_annotations())]
330    #[case("7.}", TextValue::Decimal(Decimal::new(7, 0)).without_annotations())]
331    #[should_panic]
332    //       v---- Missing trailing , or }
333    #[case("5 ", TextValue::String(String::from("<should panic>")).without_annotations())]
334    #[should_panic]
335    //      v--- No value, just a comma
336    #[case(", ", TextValue::String(String::from("<should panic>")).without_annotations())]
337    fn test_parse_struct_field_values(#[case] text: &str, #[case] expected: AnnotatedTextValue) {
338        parse_test_ok(struct_field_value, text, expected);
339    }
340
341    #[rstest]
342    #[case("foo:", text_token("foo"))]
343    #[case("  foo  :", text_token("foo"))]
344    #[case(
345        "/* Here's a field name */  foo // And here's a delimiter\n:",
346        text_token("foo")
347    )]
348    #[case("'foo':", text_token("foo"))]
349    #[case("  'foo'  :", text_token("foo"))]
350    #[case("$10:", local_sid_token(10))]
351    #[case("  $10  :", local_sid_token(10))]
352    #[case("\"foo\":", text_token("foo"))]
353    #[case("  \"foo\"  :", text_token("foo"))]
354    fn test_parse_struct_field_name(#[case] text: &str, #[case] expected: RawSymbolToken) {
355        parse_test_ok(struct_field_name, text, expected);
356    }
357
358    #[rstest]
359    #[case("foo:", Some(text_token("foo")))]
360    #[case("  foo  :", Some(text_token("foo")))]
361    #[case("'foo':", Some(text_token("foo")))]
362    #[case("}", None)]
363    #[case("   }", None)]
364    #[case("/*comment*/}", None)]
365    fn test_parse_struct_field_name_or_end(
366        #[case] text: &str,
367        #[case] expected: Option<RawSymbolToken>,
368    ) {
369        parse_test_ok(struct_field_name_or_end, text, expected);
370    }
371}