tnipv_preamble/
lib.rs

1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
5 */
6//! This crate comprises the preamble parsing logic for `tnipv`, the TNIP
7//! validator.
8//!
9//! See [`Preamble`] for more details.
10#![warn(missing_docs)]
11
12use annotate_snippets::snippet::{Annotation, AnnotationType, Slice, Snippet};
13
14use regex::Regex;
15
16use snafu::{ensure, Backtrace, OptionExt, Snafu};
17
18use std::collections::HashMap;
19
20/// Errors that can arise while parsing a preamble. See [`Preamble::parse'].
21#[derive(Debug, Snafu)]
22pub struct ParseErrors<'a> {
23    backtrace: Backtrace,
24    errors: Vec<Snippet<'a>>,
25}
26
27impl<'a> ParseErrors<'a> {
28    /// Consumes the error and returns the diagnostic messages (annotations)
29    /// that caused it.
30    pub fn into_errors(self) -> Vec<Snippet<'a>> {
31        self.errors
32    }
33}
34
35/// Errors that can arise from [`Preamble::split`].
36#[derive(Debug, Snafu)]
37#[snafu(module)]
38pub enum SplitError {
39    /// Bytes appeared before the first delimiter.
40    #[snafu(context(suffix(false)))]
41    LeadingGarbage,
42    /// The first delimiter was not found.
43    #[snafu(context(suffix(false)))]
44    MissingStart,
45    /// The second delimiter was not found.
46    #[snafu(context(suffix(false)))]
47    MissingEnd,
48}
49
50#[derive(Debug, Clone, Default)]
51struct Fields<'a> {
52    vec: Vec<Field<'a>>,
53    map: HashMap<&'a str, usize>,
54}
55
56impl<'a> Fields<'a> {
57    fn push(&mut self, field: Field<'a>) {
58        let idx = self.vec.len();
59        self.map.insert(field.name, idx);
60        self.vec.push(field);
61    }
62
63    fn iter(&self) -> impl '_ + Iterator<Item = Field<'a>> {
64        // Use the `Vec` to iterate, so lints can detect duplicates.
65        self.vec.iter().copied()
66    }
67
68    fn by_name(&self, name: &str) -> Option<Field<'a>> {
69        self.map.get(name).map(|idx| self.vec[*idx])
70    }
71
72    fn by_index(&self, index: usize) -> Option<Field<'a>> {
73        self.vec.get(index).copied()
74    }
75}
76
77/// An ordered list of fields from a preamble.
78#[derive(Debug, Default, Clone)]
79pub struct Preamble<'a> {
80    fields: Fields<'a>,
81}
82
83impl<'a> Preamble<'a> {
84    /// Divides the given text into a preamble portion and a body portion.
85    pub fn split(text: &'a str) -> Result<(&'a str, &'a str), SplitError> {
86        let re_marker = Regex::new(r"(^|\n)---(\n|$)").unwrap();
87
88        let mut iter = re_marker.find_iter(text);
89
90        let start = iter.next().context(split_error::MissingStart)?;
91        let end = iter.next().context(split_error::MissingEnd)?;
92
93        ensure!(start.start() == 0, split_error::LeadingGarbage);
94
95        let preamble = &text[start.end()..end.start()];
96        let body = &text[end.end()..];
97
98        Ok((preamble, body))
99    }
100
101    /// Parse some preamble text (usually extracted with [`Preamble::split`])
102    /// for easy access.
103    pub fn parse(origin: Option<&'a str>, text: &'a str) -> Result<Self, ParseErrors<'a>> {
104        let lines = text.split('\n');
105        let mut result: Result<Fields<'a>, Vec<Snippet<'a>>> = Ok(Default::default());
106
107        for (index, line) in lines.enumerate() {
108            let line_start = index + 1 + 1; // Lines start at one, plus `---\n`.
109
110            result = match (result, Self::parse_line(origin, line_start, line)) {
111                // Correct so far, and parsed a good name/value pair.
112                (Ok(mut fields), Ok(new_field)) => {
113                    fields.push(new_field);
114                    Ok(fields)
115                }
116
117                // Had errors, and failed to parse a name/value pair.
118                (Err(mut errors), Err(new_error)) => {
119                    errors.push(new_error);
120                    Err(errors)
121                }
122
123                // Was correct, but failed to parse the next name/value pair.
124                (Ok(_), Err(new_error)) => Err(vec![new_error]),
125
126                // Had errors, but successfully parsed a name/value pair.
127                (r @ Err(_), Ok(_)) => r,
128            };
129        }
130
131        match result {
132            Ok(fields) => Ok(Self { fields }),
133            Err(errors) => ParseErrorsSnafu { errors }.fail(),
134        }
135    }
136
137    #[allow(clippy::result_large_err)]
138    fn parse_line(
139        origin: Option<&'a str>,
140        line_start: usize,
141        line: &'a str,
142    ) -> Result<Field<'a>, Snippet<'a>> {
143        let mut parts = line.splitn(2, ':');
144        let name = parts.next().unwrap();
145        let value = match parts.next() {
146            Some(v) => v,
147            None => {
148                return Err(Snippet {
149                    title: Some(Annotation {
150                        label: Some("missing delimiter `:` in preamble field"),
151                        id: None,
152                        annotation_type: AnnotationType::Error,
153                    }),
154                    slices: vec![Slice {
155                        source: line,
156                        line_start,
157                        origin,
158                        annotations: vec![],
159                        fold: false,
160                    }],
161                    ..Default::default()
162                });
163            }
164        };
165
166        Ok(Field {
167            line_start,
168            name,
169            value,
170            source: line,
171        })
172    }
173
174    /// Provides an iterator over the fields from the preamble, in the order
175    /// they appeared in the source text.
176    pub fn fields(&self) -> impl '_ + Iterator<Item = Field<'a>> {
177        self.fields.iter()
178    }
179
180    /// Get a field by its name, or `None` if it isn't present.
181    pub fn by_name(&self, name: &str) -> Option<Field<'a>> {
182        self.fields.by_name(name)
183    }
184
185    /// Get a field by its position in the source file (zero-indexed.)
186    pub fn by_index(&self, index: usize) -> Option<Field<'a>> {
187        self.fields.by_index(index)
188    }
189}
190
191/// A field from a [`Preamble`] that includes its position in a source file.
192#[derive(Debug, Clone, Copy)]
193pub struct Field<'a> {
194    line_start: usize,
195    source: &'a str,
196    name: &'a str,
197    value: &'a str,
198}
199
200impl<'a> Field<'a> {
201    /// Line the field was defined on.
202    pub fn line_start(&self) -> usize {
203        self.line_start
204    }
205
206    /// Key (before the colon) of this preamble field.
207    pub fn name(&self) -> &'a str {
208        self.name
209    }
210
211    /// Value (after the colon) of this preamble field.
212    pub fn value(&self) -> &'a str {
213        self.value
214    }
215
216    /// File where this field is defined.
217    pub fn source(&self) -> &'a str {
218        self.source
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use annotate_snippets::display_list::DisplayList;
225    use assert_matches::assert_matches;
226
227    use super::*;
228
229    #[test]
230    fn split_missing_start() {
231        let input = "hello world\n";
232        let actual = Preamble::split(input).unwrap_err();
233        assert_matches!(actual, SplitError::MissingStart { .. });
234    }
235
236    #[test]
237    fn split_missing_end() {
238        let input = "---\nfoo: bar\n";
239        let actual = Preamble::split(input).unwrap_err();
240        assert_matches!(actual, SplitError::MissingEnd { .. });
241    }
242
243    #[test]
244    fn split_leading_garbage() {
245        let input = "hello world\n---\nfoo: bar\n---\n";
246        let actual = Preamble::split(input).unwrap_err();
247        assert_matches!(actual, SplitError::LeadingGarbage { .. });
248    }
249
250    #[test]
251    fn split_line_feed() {
252        let input = "---\nfoo: bar\n---\n\nhello world";
253        let (preamble, body) = Preamble::split(input).unwrap();
254
255        assert_eq!(preamble, "foo: bar");
256        assert_eq!(body, "\nhello world");
257    }
258
259    #[test]
260    fn split_carriage_return_then_line_feed() {
261        let input = "---\r\nfoo: bar\r\n---\r\n\r\nhello world";
262        let actual = Preamble::split(input).unwrap_err();
263        assert_matches!(actual, SplitError::MissingStart { .. });
264    }
265
266    #[test]
267    fn split_carriage_return() {
268        let input = "---\rfoo: bar\r---\r\rhello world";
269        let actual = Preamble::split(input).unwrap_err();
270        assert_matches!(actual, SplitError::MissingStart { .. });
271    }
272
273    #[test]
274    fn split_no_trailing_newline() {
275        let input = "---\nfoo: bar\n---";
276        let (preamble, body) = Preamble::split(input).unwrap();
277
278        assert_eq!(preamble, "foo: bar");
279        assert_eq!(body, "");
280    }
281
282    #[test]
283    fn split() {
284        let input = "---\nfoo: bar\n---\n\nhello world\n";
285        let (preamble, body) = Preamble::split(input).unwrap();
286
287        assert_eq!(preamble, "foo: bar");
288        assert_eq!(body, "\nhello world\n");
289    }
290
291    #[test]
292    fn parse_missing_colon() {
293        let input = "foo: bar\nbanana split";
294        let result = Preamble::parse(None, input).unwrap_err();
295        assert_eq!(result.errors.len(), 1);
296
297        let snippet = result.into_errors().pop().unwrap();
298        let actual = DisplayList::from(snippet).to_string();
299        let expected = r#"error: missing delimiter `:` in preamble field
300  |
3013 | banana split
302  |"#;
303        assert_eq!(actual, expected);
304    }
305
306    #[test]
307    fn parse_missing_value() {
308        let input = "foo:\n";
309        Preamble::parse(None, input).unwrap_err();
310    }
311
312    #[test]
313    fn parse() {
314        let input = "foo: bar\nbanana: split";
315        let result = Preamble::parse(None, input).unwrap();
316        let fields: Vec<_> = result.fields().collect();
317
318        assert_matches!(
319            fields.as_slice(),
320            [
321                Field {
322                    line_start: 2,
323                    name: "foo",
324                    value: " bar",
325                    source: "foo: bar",
326                },
327                Field {
328                    line_start: 3,
329                    name: "banana",
330                    value: " split",
331                    source: "banana: split",
332                },
333            ]
334        );
335    }
336}