codeowners_rs/
parser.rs

1use std::{fs::File, io::Read, path::Path};
2
3use crate::ruleset::{self, Owner};
4
5/// Parse a CODEOWNERS file from a string, returning a `ParseResult` containing
6/// the parsed rules and any errors encountered.
7pub fn parse(source: &str) -> ParseResult {
8    Parser::new(source).parse()
9}
10
11/// Parse a CODEOWNERS file from a file path, reading the contents of the file
12/// and returning a `ParseResult` containing the parsed rules and any errors
13/// encountered.
14pub fn parse_file(path: &Path) -> std::io::Result<ParseResult> {
15    let mut file = File::open(path)?;
16    let mut source = String::new();
17    file.read_to_string(&mut source)?;
18    Ok(parse(&source))
19}
20
21/// The result of parsing a CODEOWNERS file. Contains a `Vec` of parsed rules
22/// and a `Vec` of errors encountered during parsing. If the `Vec` of errors is
23/// non-empty, the `Vec` of rules may be incomplete. If the `Vec` of errors is
24/// empty, the file was parsed successfully.
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub struct ParseResult {
27    pub rules: Vec<Rule>,
28    pub errors: Vec<ParseError>,
29}
30
31impl ParseResult {
32    /// Convert the `ParseResult` into a `RuleSet`. If the `ParseResult` contains
33    /// any errors, they are ignored.
34    pub fn into_ruleset(self: ParseResult) -> ruleset::RuleSet {
35        ruleset::RuleSet::new(self.rules.into_iter().map(|r| r.into()).collect())
36    }
37}
38
39/// A parsed CODEOWNERS rule. Contains a pattern and a list of owners, along
40/// with any comments that were found before or after the rule. All fields are
41/// wrapped in `Spanned` to preserve the original source location.
42///
43/// For most uses, the `Rule` type should be converted into a `ruleset::Rule`
44/// using the `From` trait or the `into_ruleset` method on `ParseResult`. This
45/// will remove the `Spanned` wrappers and discard any comments.
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct Rule {
48    pub pattern: Spanned<String>,
49    pub owners: Vec<Spanned<Owner>>,
50    pub leading_comments: Vec<Spanned<String>>,
51    pub trailing_comment: Option<Spanned<String>>,
52}
53
54impl Rule {
55    fn new(pattern: Spanned<String>, owners: Vec<Spanned<Owner>>) -> Rule {
56        Rule {
57            pattern,
58            owners,
59            leading_comments: Vec::new(),
60            trailing_comment: None,
61        }
62    }
63}
64
65impl From<Rule> for ruleset::Rule {
66    fn from(rule: Rule) -> Self {
67        ruleset::Rule {
68            pattern: rule.pattern.0,
69            owners: rule.owners.into_iter().map(|o| o.0).collect(),
70        }
71    }
72}
73
74/// An error encountered while parsing a CODEOWNERS file. Contains a message
75/// describing the error and a `Span` indicating the location of the error.
76#[derive(Debug, Clone, PartialEq, Eq)]
77pub struct ParseError {
78    pub message: String,
79    pub span: Span,
80}
81
82impl ParseError {
83    fn new(message: impl Into<String>, span: impl Into<Span>) -> ParseError {
84        ParseError {
85            message: message.into(),
86            span: span.into(),
87        }
88    }
89}
90
91/// A span of text in a CODEOWNERS file. Contains the start and end byte offsets
92/// of the span.
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub struct Span(pub usize, pub usize);
95
96impl From<(usize, usize)> for Span {
97    fn from((start, end): (usize, usize)) -> Self {
98        Span(start, end)
99    }
100}
101
102/// A wrapper around a value that preserves the original source location of the
103/// value. Contains the value and a `Span` indicating the location of the value.
104#[derive(Debug, Clone, PartialEq, Eq)]
105pub struct Spanned<T>(pub T, pub Span);
106
107impl<T> Spanned<T> {
108    fn new(val: impl Into<T>, span: impl Into<Span>) -> Spanned<T> {
109        Spanned(val.into(), span.into())
110    }
111}
112
113struct Parser<'a> {
114    source: &'a str,
115    pos: usize,
116    errors: Vec<ParseError>,
117}
118
119impl<'a> Parser<'a> {
120    fn new(source: &'a str) -> Self {
121        Self {
122            source,
123            pos: 0,
124            errors: Vec::new(),
125        }
126    }
127
128    fn parse(mut self) -> ParseResult {
129        let mut rules = Vec::new();
130        let mut leading_comments = Vec::new();
131
132        // Recoverable errors are added to self.errors during parsing,
133        // unrecoverable errors are passed via results
134        self.skip_whitespace();
135        while let Some(c) = self.peek() {
136            match c {
137                '\r' | '\n' => {
138                    self.next();
139                }
140                '#' => {
141                    let comment = self.parse_comment();
142                    leading_comments.push(comment);
143                }
144                _ => {
145                    match self.parse_rule() {
146                        Ok(mut rule) => {
147                            rule.leading_comments = leading_comments;
148                            rules.push(rule)
149                        }
150                        Err(e) => {
151                            self.errors.push(e);
152                            break;
153                        }
154                    }
155                    leading_comments = Vec::new();
156                }
157            }
158            self.skip_whitespace();
159        }
160
161        ParseResult {
162            rules,
163            errors: self.errors,
164        }
165    }
166
167    fn parse_comment(&mut self) -> Spanned<String> {
168        let start = self.pos;
169        let mut comment = String::new();
170        loop {
171            match self.peek() {
172                Some('\r' | '\n') => break,
173                Some(c) => {
174                    self.next();
175                    comment.push(c);
176                }
177                None => break,
178            }
179        }
180        Spanned::new(comment, (start, self.pos))
181    }
182
183    fn parse_rule(&mut self) -> Result<Rule, ParseError> {
184        let pattern = self.parse_pattern();
185        if pattern.0.is_empty() {
186            return Err(ParseError::new("expected pattern", (self.pos, self.pos)));
187        }
188
189        let mut owners = Vec::new();
190        loop {
191            self.skip_whitespace();
192            let Some(owner) = self.parse_owner() else {
193                break;
194            };
195            owners.push(owner);
196        }
197
198        // Find pattern terminator (newline, EOF, or #)
199        match self.peek() {
200            Some('\r' | '\n') | None => Ok(Rule::new(pattern, owners)),
201            Some('#') => {
202                let trailing_comment = Some(self.parse_comment());
203                Ok(Rule {
204                    pattern,
205                    owners,
206                    leading_comments: vec![],
207                    trailing_comment,
208                })
209            }
210            _ => Err(ParseError::new("expected newline", (self.pos, self.pos))),
211        }
212    }
213
214    fn parse_pattern(&mut self) -> Spanned<String> {
215        let start = self.pos;
216        let mut pattern = String::new();
217        let mut escaped = false;
218        loop {
219            match self.peek() {
220                Some('\\') if !escaped => {
221                    escaped = true;
222                    self.next();
223                }
224                Some(' ' | '\t' | '#' | '\r' | '\n') if !escaped => break,
225                Some(c) => {
226                    if c == '\0' {
227                        self.errors.push(ParseError::new(
228                            "patterns cannot contain null bytes",
229                            (self.pos, self.pos + 1),
230                        ));
231                    }
232                    pattern.push(c);
233                    self.next();
234                    escaped = false;
235                }
236                None => break,
237            }
238        }
239        Spanned::new(pattern, (start, self.pos))
240    }
241
242    fn parse_owner(&mut self) -> Option<Spanned<Owner>> {
243        let start = self.pos;
244        let mut owner_str = String::new();
245        loop {
246            match self.peek() {
247                Some(' ' | '\t' | '#' | '\r' | '\n') => break,
248                Some(c) => {
249                    owner_str.push(c);
250                    self.next();
251                }
252                None => break,
253            }
254        }
255
256        if owner_str.is_empty() {
257            return None;
258        }
259
260        match Owner::try_from(owner_str) {
261            Ok(owner) => Some(Spanned::new(owner, (start, self.pos))),
262            Err(err) => {
263                self.errors.push(ParseError {
264                    message: err.to_string(),
265                    span: (start, self.pos).into(),
266                });
267                None
268            }
269        }
270    }
271
272    fn skip_whitespace(&mut self) {
273        while let Some(' ' | '\t') = self.peek() {
274            self.next();
275        }
276    }
277
278    fn peek(&self) -> Option<char> {
279        self.source[self.pos..].chars().next()
280    }
281
282    fn next(&mut self) -> Option<char> {
283        let c = self.peek()?;
284        self.pos += c.len_utf8();
285        Some(c)
286    }
287}
288
289#[cfg(test)]
290mod tests {
291    use super::ruleset::OwnerKind;
292    use super::*;
293
294    #[test]
295    fn test_parser() {
296        let examples = vec![
297            (
298                "foo",
299                vec![Rule::new(Spanned::new("foo", (0, 3)), vec![])],
300                vec![],
301            ),
302            (
303                "foo\\  ",
304                vec![Rule::new(Spanned::new("foo ", (0, 5)), vec![])],
305                vec![],
306            ),
307            (
308                " foo ",
309                vec![Rule::new(Spanned::new("foo", (1, 4)), vec![])],
310                vec![],
311            ),
312            (
313                "foo\nbar\r\n \nbaz",
314                vec![
315                    Rule::new(Spanned::new("foo", (0, 3)), vec![]),
316                    Rule::new(Spanned::new("bar", (4, 7)), vec![]),
317                    Rule::new(Spanned::new("baz", (11, 14)), vec![]),
318                ],
319                vec![],
320            ),
321            (
322                "f\0oo",
323                vec![Rule::new(Spanned::new("f\0oo", (0, 4)), vec![])],
324                vec![ParseError::new(
325                    "patterns cannot contain null bytes",
326                    (1, 2),
327                )],
328            ),
329            (
330                "foo bar",
331                vec![Rule::new(Spanned::new("foo", (0, 3)), vec![])],
332                vec![ParseError::new("invalid owner: bar", (4, 7))],
333            ),
334            (
335                "foo#abc",
336                vec![Rule {
337                    pattern: Spanned::new("foo", (0, 3)),
338                    owners: Default::default(),
339                    leading_comments: Default::default(),
340                    trailing_comment: Some(Spanned::new("#abc", (3, 7))),
341                }],
342                vec![],
343            ),
344            (
345                "foo @bar",
346                vec![Rule::new(
347                    Spanned::new("foo", (0, 3)),
348                    vec![Spanned::new(
349                        Owner::new("@bar".to_string(), OwnerKind::User),
350                        (4, 8),
351                    )],
352                )],
353                vec![],
354            ),
355            (
356                "a/b @c/d e@f.co",
357                vec![Rule::new(
358                    Spanned::new("a/b", (0, 3)),
359                    vec![
360                        Spanned::new(Owner::new("@c/d".to_string(), OwnerKind::Team), (4, 8)),
361                        Spanned::new(Owner::new("e@f.co".to_string(), OwnerKind::Email), (9, 15)),
362                    ],
363                )],
364                vec![],
365            ),
366            (
367                "\n foo @bar# baz \n",
368                vec![Rule {
369                    pattern: Spanned::new("foo", (2, 5)),
370                    owners: vec![Spanned::new(
371                        Owner::new("@bar".to_string(), OwnerKind::User),
372                        (6, 10),
373                    )],
374                    leading_comments: Default::default(),
375                    trailing_comment: Some(Spanned::new("# baz ", (10, 16))),
376                }],
377                vec![],
378            ),
379            (
380                "# a\nfoo # b\n# c\n# d\n\nbar\n",
381                vec![
382                    Rule {
383                        pattern: Spanned::new("foo", (4, 7)),
384                        owners: vec![],
385                        leading_comments: vec![Spanned::new("# a", (0, 3))],
386                        trailing_comment: Some(Spanned::new("# b", (8, 11))),
387                    },
388                    Rule {
389                        pattern: Spanned::new("bar", (21, 24)),
390                        owners: vec![],
391                        leading_comments: vec![
392                            Spanned::new("# c", (12, 15)),
393                            Spanned::new("# d", (16, 19)),
394                        ],
395                        trailing_comment: None,
396                    },
397                ],
398                vec![],
399            ),
400        ];
401
402        for (source, rules, errors) in examples {
403            assert_eq!(
404                Parser::new(source).parse(),
405                ParseResult { rules, errors },
406                "result mismatch for `{}`",
407                source
408            );
409        }
410    }
411}