sectxtlib/
parsers.rs

1use crate::{ParseError, SecurityTxtOptions};
2
3use super::raw_field::RawField;
4
5use nom::{
6    branch::alt,
7    bytes::complete::{take_while, take_while1},
8    character::complete::{char, crlf, satisfy},
9    combinator::{all_consuming, map, opt, recognize},
10    multi::{many0_count, many1},
11    sequence::{preceded, terminated, tuple},
12    IResult,
13};
14
15pub(crate) struct SecurityTxtParser {
16    _options: SecurityTxtOptions,
17}
18
19impl SecurityTxtParser {
20    pub fn new(options: &SecurityTxtOptions) -> Self {
21        Self {
22            _options: options.clone(),
23        }
24    }
25
26    pub fn parse<'a>(&'a self, text: &'a str) -> Result<Vec<Option<RawField<'a>>>, ParseError> {
27        let (_, msg) = self.body_parser(text)?;
28        Ok(msg)
29    }
30
31    // body             =  signed / unsigned
32    // signed is handled separately.
33    fn body_parser<'a>(&'a self, i: &'a str) -> IResult<&'a str, Vec<Option<RawField<'a>>>> {
34        all_consuming(|x| self.unsigned_parser(x))(i)
35    }
36
37    // unsigned       =  *line (contact-field eol) ; one or more required
38    //                   *line (expires-field eol) ; exactly one required
39    //                   *line [lang-field eol] *line ; exactly one optional
40    //                   ; order of fields within the file is not important
41    //                   ; except that if contact-field appears more
42    //                   ; than once, the order of those indicates
43    //                   ; priority (see Section 3.5.3)
44    fn unsigned_parser<'a>(&'a self, i: &'a str) -> IResult<&'a str, Vec<Option<RawField<'a>>>> {
45        many1(|x| self.line_parser(x))(i)
46    }
47
48    // line             =  [ (field / comment) ] eol
49    fn line_parser<'a>(&'a self, i: &'a str) -> IResult<&'a str, Option<RawField<'a>>> {
50        let field_parser_opt = map(|x| self.field_parser(x), Some);
51        let comment_parser_opt = map(|x| self.comment_parser(x), |_| None);
52
53        let (i, raw_field) = terminated(opt(alt((comment_parser_opt, field_parser_opt))), |x| self.eol_parser(x))(i)?;
54        let flattened = raw_field.flatten();
55        Ok((i, flattened))
56    }
57
58    // eol              =  *WSP [CR] LF
59    fn eol_parser<'a>(&'a self, i: &'a str) -> IResult<&'a str, &'a str> {
60        recognize(tuple((take_while(is_wsp), opt(|x| self.cr_parser(x)), |x| {
61            self.lf_parser(x)
62        })))(i)
63    }
64
65    // field            =  ; optional fields
66    //                     ack-field /
67    //                     can-field /
68    //                     contact-field / ; optional repeated instances
69    //                     encryption-field /
70    //                     hiring-field /
71    //                     policy-field /
72    //                     ext-field
73    fn field_parser<'a>(&'a self, i: &'a str) -> IResult<&'a str, RawField<'a>> {
74        self.ext_name_parser(i)
75    }
76
77    // fs               =  ":"
78    fn fs_parser<'a>(&'a self, i: &'a str) -> IResult<&'a str, char> {
79        char(':')(i)
80    }
81
82    // comment          =  "#" *(WSP / VCHAR / %x80-FFFFF)
83    fn comment_parser<'a>(&'a self, i: &'a str) -> IResult<&'a str, &'a str> {
84        let matcher = |x| is_wsp(x) || is_vchar(x) || x >= '\u{80}';
85        preceded(char('#'), take_while(matcher))(i)
86    }
87
88    // ack-field        =  "Acknowledgments" fs SP uri
89    // can-field        =  "Canonical" fs SP uri
90    // contact-field    =  "Contact" fs SP uri
91    // expires-field    =  "Expires" fs SP date-time
92    // encryption-field =  "Encryption" fs SP uri
93    // hiring-field     =  "Hiring" fs SP uri
94    // lang-field       =  "Preferred-Languages" fs SP lang-values
95    // policy-field     =  "Policy" fs SP uri
96    // date-time        =  < imported from Section 5.6 of [RFC3339] >
97    // lang-tag         =  < Language-Tag from Section 2.1 of [RFC5646] >
98    // lang-values      =  lang-tag *(*WSP "," *WSP lang-tag)
99    // uri              =  < URI as per Section 3 of [RFC3986] >
100
101    // ext-field        =  field-name fs SP unstructured
102    fn ext_name_parser<'a>(&'a self, i: &'a str) -> IResult<&'a str, RawField<'a>> {
103        let (i, (name, _, _, value)) = tuple((
104            |x| self.field_name_parser(x),
105            |x| self.fs_parser(x),
106            |x| self.sp_parser(x),
107            |x| self.unstructured_parser(x),
108        ))(i)?;
109        Ok((i, RawField { name, value }))
110    }
111
112    // field-name       =  < imported from Section 3.6.8 of [RFC5322] >
113    // field-name       =  1*ftext
114    fn field_name_parser<'a>(&'a self, i: &'a str) -> IResult<&'a str, &'a str> {
115        take_while1(is_ftext_char)(i)
116    }
117
118    // < imported from [RFC5322] >
119    // unstructured     =   *([FWS] VCHAR) *WSP
120    // Ommitted obsolete part.
121    fn unstructured_parser<'a>(&'a self, i: &'a str) -> IResult<&'a str, &'a str> {
122        recognize(terminated(
123            recognize(many0_count(preceded(opt(|x| self.fws_parser(x)), satisfy(is_vchar)))),
124            take_while(is_wsp),
125        ))(i)
126    }
127
128    // < imported from [RFC5322] >
129    // FWS              =   [*WSP CRLF] 1*WSP
130    // Ommitted obsolete part.
131    fn fws_parser<'a>(&'a self, i: &'a str) -> IResult<&'a str, &'a str> {
132        recognize(preceded(
133            opt(tuple((take_while(is_wsp), |x| self.crlf_parser(x)))),
134            take_while1(is_wsp),
135        ))(i)
136    }
137
138    fn cr_parser<'a>(&'a self, i: &'a str) -> IResult<&'a str, char> {
139        satisfy(is_cr)(i)
140    }
141
142    // CRLF             =  CR LF
143    //                       ; Internet standard newline
144    fn crlf_parser<'a>(&'a self, i: &'a str) -> IResult<&'a str, &'a str> {
145        crlf(i)
146    }
147
148    fn lf_parser<'a>(&'a self, i: &'a str) -> IResult<&'a str, char> {
149        satisfy(is_lf)(i)
150    }
151
152    // SP               =  %x20
153    fn sp_parser<'a>(&'a self, i: &'a str) -> IResult<&'a str, char> {
154        char(' ')(i)
155    }
156}
157
158// field-name       =  < imported from Section 3.6.8 of [RFC5322] >
159// ftext            =  %d33-57 /          ; Printable US-ASCII
160//                     %d59-126           ;  characters not including
161//                                        ;  ":".
162fn is_ftext_char(i: char) -> bool {
163    match i {
164        '\x21'..='\x39' => true, // %d33-57
165        '\x3B'..='\x7E' => true, // %d59-126
166        _ => false,
167    }
168}
169
170// CR               =  %x0D
171//                       ; carriage return
172fn is_cr(i: char) -> bool {
173    i == '\r'
174}
175
176// LF               =  %x0A
177//                       ; linefeed
178fn is_lf(i: char) -> bool {
179    i == '\n'
180}
181
182// VCHAR            =  %x21-7E
183//                       ; visible (printing) characters
184fn is_vchar(i: char) -> bool {
185    matches!(i, '\x21'..='\x7E')
186}
187
188// WSP              =  SP / HTAB
189//                       ; white space
190fn is_wsp(i: char) -> bool {
191    i == ' ' || i == '\t'
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197    use std::{fs, path::PathBuf};
198
199    fn get_tests_dir(category: &str) -> PathBuf {
200        let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
201        d.push(format!("resources/test/{category}"));
202        d
203    }
204
205    fn run_tests_from_dir(dir: &str) {
206        let unsigned_parser = SecurityTxtParser::new(&Default::default());
207        let paths = get_tests_dir(dir).read_dir().unwrap();
208
209        for path in paths {
210            let file = path.unwrap().path();
211            println!("Input file: {:?}", file);
212            let buf = fs::read_to_string(file).unwrap();
213            let txt = unsigned_parser.parse(&buf);
214            assert!(txt.is_ok());
215        }
216    }
217
218    #[test]
219    fn test_category_gen_unsigned() {
220        run_tests_from_dir("gen_unsigned")
221    }
222
223    #[test]
224    fn test_line_parser() {
225        let unsigned_parser = SecurityTxtParser::new(&Default::default());
226        let test_vector = vec![
227            ("\n", None),
228            ("\t \r\n", None),
229            ("# This is a comment.\n", None),
230            (
231                "foo: bar\r\n",
232                Some(RawField {
233                    name: "foo",
234                    value: "bar",
235                }),
236            ),
237        ];
238
239        for (input, result) in test_vector {
240            assert_eq!(unsigned_parser.line_parser(input), Ok(("", result)));
241        }
242    }
243}