r3bl_tui/tui/md_parser/extended/
parse_metadata_kcsv.rs

1/*
2 *   Copyright (c) 2023 R3BL LLC
3 *   All rights reserved.
4 *
5 *   Licensed under the Apache License, Version 2.0 (the "License");
6 *   you may not use this file except in compliance with the License.
7 *   You may obtain a copy of the License at
8 *
9 *   http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *   Unless required by applicable law or agreed to in writing, software
12 *   distributed under the License is distributed on an "AS IS" BASIS,
13 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *   See the License for the specific language governing permissions and
15 *   limitations under the License.
16 */
17
18use nom::{bytes::complete::tag,
19          combinator::opt,
20          sequence::{preceded, tuple},
21          IResult};
22
23use crate::{constants::{COLON, COMMA, NEW_LINE, SPACE},
24            list,
25            take_text_until_new_line_or_end,
26            List};
27
28/// - Sample parse input: `@tags: tag1, tag2, tag3`, `@tags: tag1, tag2, tag3\n`,
29///   or `@authors: me, myself, i`, `@authors: me, myself, i\n`.
30/// - There may or may not be a newline at the end. If there is, it is consumed.
31pub fn parse_csv_opt_eol<'a>(
32    tag_name: &'a str,
33    input: &'a str,
34) -> IResult<&'a str, List<&'a str>> {
35    let (remainder, tags_text) = preceded(
36        /* start */ tuple((tag(tag_name), tag(COLON), tag(SPACE))),
37        /* output */ take_text_until_new_line_or_end(),
38    )(input)?;
39
40    // If there is a newline, consume it since there may or may not be a newline at
41    // the end.
42    let (remainder, _) = opt(tag(NEW_LINE))(remainder)?;
43
44    // Special case: Early return when just a `@tags: ` or `@tags: \n` is found.
45    if tags_text.is_empty() {
46        Ok((remainder, list![]))
47    }
48    // Normal case.
49    else {
50        // At this point, `output` can have something like: `tag1, tag2, tag3`.
51        let (_, vec_tags_text) = parse_comma_separated_list(tags_text)?;
52        Ok((remainder, List::from(vec_tags_text)))
53    }
54}
55
56/// | input                | rem     |  output                           |
57/// | -------------------- | ------- | --------------------------------- |
58/// | `"tag1, tag2, tag3"` | `""`    | `vec!(["tag1", "tag2", "tag3"])`  |
59fn parse_comma_separated_list(input: &str) -> IResult<&str, Vec<&str>> {
60    let acc: Vec<&str> = input.split(COMMA).collect();
61    let mut trimmed_acc: Vec<&str> = Vec::with_capacity(acc.len());
62
63    // Verify whitespace prefix rules.
64    match acc.len() {
65        0 => {
66            // Empty. Nothing to do here.
67        }
68        1 => {
69            // Only one item. Must not be prefixed with a space.
70            let only_item = &acc[0];
71            if only_item.starts_with(SPACE) {
72                return Err(nom::Err::Error(nom::error::Error::new(
73                    "Only item must not start with space.",
74                    nom::error::ErrorKind::Fail,
75                )));
76            } else {
77                trimmed_acc.push(only_item);
78            }
79        }
80        _ => {
81            // More than one item:
82            // 1. 1st item must not be prefixed with a space.
83            // 2. 2nd item onwards must be prefixed by at least 1 space, may have more.
84            let mut my_iter = acc.iter();
85
86            let first_item = my_iter.next().unwrap();
87
88            // First item must not be prefixed with a space.
89            if first_item.starts_with(SPACE) {
90                return Err(nom::Err::Error(nom::error::Error::new(
91                    "First item must not start with space.",
92                    nom::error::ErrorKind::Fail,
93                )));
94            } else {
95                trimmed_acc.push(first_item);
96            }
97
98            // Rest of items must be prefixed with a space.
99            for rest_item in my_iter {
100                if !rest_item.starts_with(SPACE) {
101                    return Err(nom::Err::Error(nom::error::Error::new(
102                        "Non-first item must start with space.",
103                        nom::error::ErrorKind::Fail,
104                    )));
105                }
106                // Can only trim 1 space from start of rest_item.
107                trimmed_acc.push(&rest_item[1..]);
108            }
109        }
110    }
111
112    Ok((input, trimmed_acc))
113}
114
115#[cfg(test)]
116mod test_parse_tags_opt_eol {
117    use r3bl_core::assert_eq2;
118
119    use super::*;
120    use crate::constants::TAGS;
121
122    #[test]
123    fn test_not_quoted_no_eol() {
124        let input = "@tags: tag1, tag2, tag3";
125        let (input, output) = super::parse_csv_opt_eol(TAGS, input).unwrap();
126        assert_eq2!(input, "");
127        assert_eq2!(output, list!["tag1", "tag2", "tag3"]);
128    }
129
130    #[test]
131    fn test_not_quoted_no_eol_err_whitespace() {
132        // First fragment mustn't have any space prefix.
133        assert_eq2!(
134            parse_csv_opt_eol(TAGS, "@tags:  tag1, tag2, tag3").is_err(),
135            true,
136        );
137
138        // 2nd fragment onwards must have a single space prefix.
139        assert_eq2!(
140            parse_csv_opt_eol(TAGS, "@tags: tag1,tag2, tag3").is_err(),
141            true,
142        );
143        assert_eq2!(
144            parse_csv_opt_eol(TAGS, "@tags: tag1,  tag2,tag3").is_err(),
145            true,
146        );
147        assert_eq2!(
148            parse_csv_opt_eol(TAGS, "@tags: tag1, tag2,tag3").is_err(),
149            true,
150        );
151
152        // It is ok to have more than 1 prefix space for 2nd fragment onwards.
153        assert_eq2!(
154            parse_csv_opt_eol(TAGS, "@tags: tag1, tag2,  tag3").unwrap(),
155            ("", list!["tag1", "tag2", " tag3"]),
156        );
157    }
158
159    #[test]
160    fn test_not_quoted_with_eol() {
161        // Valid.
162        {
163            let input = "@tags: tag1, tag2, tag3\n";
164            let (input, output) = parse_csv_opt_eol(TAGS, input).unwrap();
165            assert_eq2!(input, "");
166            assert_eq2!(output, list!["tag1", "tag2", "tag3"]);
167        }
168
169        {
170            let input = "@tags: tag1, tag2, tag3\n]\n";
171            let result = parse_csv_opt_eol(TAGS, input);
172            assert_eq2!(result.is_err(), false);
173        }
174
175        {
176            let input = "@tags: tag1, tag2, tag3";
177            let result = parse_csv_opt_eol(TAGS, input);
178            assert_eq2!(result.is_err(), false);
179        }
180    }
181
182    #[test]
183    fn test_not_quoted_with_eol_whitespace() {
184        // First fragment mustn't have any space prefix.
185        assert_eq2!(
186            parse_csv_opt_eol(TAGS, "@tags:  tag1, tag2, tag3\n").is_err(),
187            true,
188        );
189
190        // 2nd fragment onwards must have a single space prefix.
191        assert_eq2!(
192            parse_csv_opt_eol(TAGS, "@tags: tag1,tag2, tag3\n").is_err(),
193            true,
194        );
195        assert_eq2!(
196            parse_csv_opt_eol(TAGS, "@tags: tag1,  tag2,tag3\n").is_err(),
197            true,
198        );
199        assert_eq2!(
200            parse_csv_opt_eol(TAGS, "@tags: tag1, tag2,tag3\n").is_err(),
201            true,
202        );
203
204        // It is ok to have more than 1 prefix space for 2nd fragment onwards.
205        assert_eq2!(
206            parse_csv_opt_eol(TAGS, "@tags: tag1, tag2,  tag3\n").unwrap(),
207            ("", list!["tag1", "tag2", " tag3"]),
208        );
209    }
210
211    #[test]
212    fn test_not_quoted_with_postfix_content() {
213        let input = "@tags: \nfoo\nbar";
214        let (input, output) = parse_csv_opt_eol(TAGS, input).unwrap();
215        assert_eq2!(input, "foo\nbar");
216        assert_eq2!(output, list![]);
217    }
218}