conventional_commits_parser/
parser.rs

1use conventional_commits_types::{
2    Commit, Footer, FooterSeparator, SEPARATOR_COLON, SEPARATOR_HASHTAG,
3};
4use nom::{
5    branch::alt,
6    bytes::complete::{tag, take, take_while1},
7    character::complete::{line_ending, not_line_ending},
8    combinator::{map, map_res, opt, peek},
9    error::{context, ParseError, VerboseError},
10    multi::many0,
11    sequence::{preceded, terminated, tuple},
12    IResult,
13};
14use nom_unicode::complete::alpha1;
15use std::str::FromStr;
16
17pub use conventional_commits_types;
18
19/// The `BREAKING CHANGE` token.
20pub const BREAKING_CHANGE_TOKEN: &str = "BREAKING CHANGE";
21
22/// The `BREAKING-CHANGE` token.
23pub const BREAKING_CHANGE_WITH_HYPHEN_TOKEN: &str = "BREAKING-CHANGE";
24
25/// Parses the commit type.
26///
27/// A commit type is a consecutive sequence of unicode characters without any
28/// whitespace in between.
29///
30/// # Specification
31///
32/// 1) Commits MUST be prefixed with a type, which consists of a noun, feat,
33/// fix, etc., [...].
34///
35/// 2) The type `feat` MUST be used when a commit adds a new feature to your
36/// application or library.
37///
38/// 3) The type `fix` MUST be used when a commit represents a bug fix for your
39/// application.
40fn r#type<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
41    alpha1(i)
42}
43
44/// Parses the commit scope.
45///
46/// A commit scope is an optional component. If present, it is surrounded by
47/// parenthesis.
48///
49/// # Specification
50///
51/// 4) A scope MAY be provided after a type. A scope MUST consist of a noun
52/// describing a section of the codebase surrounded by parenthesis, e.g.,
53/// `fix(parser):`.
54///
55/// # Implementation
56///
57/// The current implementation does only allow for consecutive unicode
58/// characters without any whitespace in between.
59fn scope<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
60    preceded(tag("("), terminated(alpha1, tag(")")))(i)
61}
62
63// A simple colon parser.
64fn colon<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
65    tag(":")(i)
66}
67
68// A simple exclamation mark parser.
69fn exclamation_mark<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
70    tag("!")(i)
71}
72
73// Parses the `: ` separator.
74fn colon_separator<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
75    let (rest, _) = colon(i)?;
76    tag(" ")(rest)
77}
78
79/// Parses the commit description.
80///
81/// A commit description can be made out of any valid unicode character except
82/// for a newline.
83///
84/// # Specification
85///
86/// 5) A description MUST immediately follow the colon and space after the
87/// type/scope prefix. The description is a short summary of the code changes,
88/// e.g., `fix: array parsing issue when multiple spaces were contained in
89/// string`.
90fn description<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
91    not_line_ending(i)
92}
93
94/// Parses the commit body.
95///
96/// A commit body is an optional component. It consists of every valid unicode
97/// character and whitespace. It terminates with a double newline.
98///
99/// # Specification
100///
101/// 6) A longer commit body MAY be provided after the short description,
102/// providing additional contextual information about the code changes. The body
103/// MUST begin one blank line after the description.
104///
105/// 7) A commit body is free-form and MAY consist of any number of newline
106/// separated paragraphs.
107// TODO: make function return Option<&str> and do not rely on empty strings
108// being empty bodies.
109fn body<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, Option<&str>, E> {
110    // If the next token is actually a footer, the body is empty.
111    if peek::<_, _, E, _>(footer_identifier)(i).is_ok() {
112        return Ok((i, None));
113    }
114
115    let mut found_newline = false;
116    let mut offset_to_split_off = 0usize;
117
118    for line in i.lines() {
119        // Check if the line is just a newline. Since we iterate over each line, the
120        // content of the line will be empty in those cases.
121        if line.is_empty() {
122            found_newline = true;
123        } else if peek::<_, _, E, _>(footer_identifier)(line).is_ok() && found_newline {
124            // We break if we find a valid footer identifier proceeded by a newline.
125            break;
126        } else {
127            // Reset trigger condition to make sure that we skip paragraphs that are not
128            // followed by a footer identifier.
129            found_newline = false;
130        }
131
132        // +1 needed to accommodate for the missing newline that sits between each of
133        // the enumerated lines.
134        offset_to_split_off += line.chars().count() + 1;
135    }
136
137    // Depending on whether a new line has been found and therefore a following
138    // footer, the offset has to be shortened by either 1 or 2 chars.
139    let to_subtract = if found_newline { 2 } else { 1 };
140
141    let (rest, b) = map(take(offset_to_split_off - to_subtract), str::trim)(i)?;
142    Ok((rest, Some(b)))
143}
144
145/// Checks if a given input is a breaking change token.
146///
147/// # Returns
148///
149/// `true` if the input matches either
150/// [BREAKING_CHANGE](consts.BREAKING_CHANGE_TOKEN.html)
151/// or [BREAKING_CHANGE_WITH_HYPHEN_TOKEN](consts.
152/// BREAKING_CHANGE_WITH_HYPHEN_TOKEN.html).
153fn is_breaking_change_token(i: &str) -> bool {
154    i == BREAKING_CHANGE_TOKEN || i == BREAKING_CHANGE_WITH_HYPHEN_TOKEN
155}
156
157fn breaking_change_footer_token<'a, E: ParseError<&'a str>>(
158    i: &'a str,
159) -> IResult<&'a str, &'a str, E> {
160    alt((
161        tag(BREAKING_CHANGE_TOKEN),
162        tag(BREAKING_CHANGE_WITH_HYPHEN_TOKEN),
163    ))(i)
164}
165
166/// Returns if the char is a valid footer token char.
167///
168/// Valid chars are all alphabetic unicode chars and the hyphen.
169fn is_footer_token_char(c: char) -> bool {
170    c.is_alphabetic() || c == '-'
171}
172
173/// Parses all footer tokens except the breaking changes one.
174fn footer_token_other<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
175    // FIXME: use take_while1 with bool function if nom_unicode#3 gets resolved.
176    take_while1(is_footer_token_char)(i)
177}
178
179/// Parses the footer token.
180///
181/// # Specification
182///
183/// 9. A footer’s token MUST use `-` in place of whitespace characters, e.g.,
184/// `Acked-by` (this helps differentiate the footer section from a
185/// multi-paragraph body). An exception is made for `BREAKING CHANGE`, which MAY
186/// also be used as a token.
187fn footer_token<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
188    alt((breaking_change_footer_token, footer_token_other))(i)
189}
190
191/// Parses the footer separator.
192///
193/// The footer separator separates the footer's token from its value.
194fn footer_separator<'a, E: ParseError<&'a str>>(
195    i: &'a str,
196) -> IResult<&'a str, FooterSeparator, E> {
197    map_res(alt((tag(SEPARATOR_COLON), tag(SEPARATOR_HASHTAG))), |v| {
198        FooterSeparator::from_str(v)
199    })(i)
200}
201
202/// A footer identifier is used to detect footers inside of a commit message.
203///
204/// The identifier is make out of a footer token followed by a footer separator.
205type FooterIdentifier<'a> = (&'a str, FooterSeparator);
206
207/// Parses a footer identifier.
208fn footer_identifier<'a, E: ParseError<&'a str>>(
209    i: &'a str,
210) -> IResult<&'a str, FooterIdentifier<'a>, E> {
211    tuple((footer_token, footer_separator))(i)
212}
213
214/// Parses a footer value.
215///
216/// A footer value is terminated by the next footer identifier.
217///
218/// # Specification
219///
220/// 10. A footer’s value MAY contain spaces and newlines, and parsing MUST
221/// terminate when the next valid footer token/separator pair is observed.
222fn footer_value<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
223    let mut offset_to_split_off = 0usize;
224    for line in i.lines() {
225        // Check if the next line starts a new footer
226        if peek::<_, _, E, _>(footer_identifier)(line).is_ok() {
227            offset_to_split_off += 1;
228            break;
229        }
230
231        offset_to_split_off += line.chars().count() + 1;
232    }
233
234    map(take(offset_to_split_off - 1), str::trim_end)(i)
235}
236
237type FooterType<'a> = (&'a str, FooterSeparator, &'a str);
238
239/// Parses a single footer entry.
240///
241/// # Specification
242///
243/// 8. One or more footers MAY be provided one blank line after the body. Each footer MUST consist of a word token, followed by either a :<space> or <space># separator, followed by a string value (this is inspired by the [git trailer convention](https://git-scm.com/docs/git-interpret-trailers)).
244fn footer<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, FooterType<'a>, E> {
245    tuple((footer_token, footer_separator, footer_value))(i)
246}
247
248/// The first line of a commit.
249///
250/// These values MUST be included as defined in the specification.
251///
252/// # Parameters
253///
254/// - The commit type.
255/// - The optional commit scope.
256/// - The optional exclamation mark.
257/// - The commit description.
258type CommitFirstLine<'a> = (&'a str, Option<&'a str>, Option<&'a str>, &'a str);
259
260/// Parses all mandatory parts of a commit.
261///
262/// # Specification
263///
264/// 1) Commits MUST be prefixed with a type, which consists of a noun, `feat`,
265/// `fix`, etc., followed by the OPTIONAL scope, OPTIONAL `!`, and REQUIRED
266/// terminal colon and space.
267///
268/// 5) A description MUST immediately follow the colon and space after the
269/// type/scope prefix. The description is a short summary of the code changes,
270/// e.g., `fix: array parsing issue when multiple spaces were contained in
271/// string`.
272fn commit<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, CommitFirstLine<'a>, E> {
273    map(
274        tuple((
275            r#type,
276            opt(scope),
277            opt(exclamation_mark),
278            colon_separator,
279            description,
280        )),
281        |(ty, scope, exclamation_mark, _, description)| (ty, scope, exclamation_mark, description),
282    )(i)
283}
284
285/// Parses the footer section.
286fn footers<'a, E: ParseError<&'a str>>(
287    i: &'a str,
288) -> IResult<&'a str, Vec<(&'a str, FooterSeparator, &'a str)>, E> {
289    //many0(preceded(opt(line_ending), footer))(i)
290    many0(footer)(i)
291}
292
293/// Parses a complete commit with all optional parts.
294fn commit_complete<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, Commit<'a>, E> {
295    map(
296        tuple((
297            context("First line", commit),
298            context("Optional body", |i| {
299                // The body is separated by one empty line. However, the first line parser does
300                // not consume the newline after the description. This has to be done now.
301                let (rest, line_end) = opt(line_ending::<_, E>)(i)?;
302                if line_end.is_none() {
303                    // No new line has been found, so the commit message only contains a
304                    // description.
305                    return Ok((i, None));
306                }
307
308                let (rest, optional_body) = opt::<_, _, E, _>(preceded(line_ending, body))(rest)?;
309
310                // XXX: maybe this can be done better. Not sure how exactly though. The double
311                // option feels hacky and as far as I can tell, None doesn't happen anyway as we
312                // check it already early on.
313                match optional_body {
314                    // If None than no body has been found, i.e. only a description. In this case
315                    // return the original input as the rest.
316                    None => Ok((i, None)),
317                    Some(inner_optional) => {
318                        // If the inner value is None than no body has been found.
319                        match inner_optional {
320                            None => Ok((i, None)),
321                            Some(b) => Ok((rest, Some(b))),
322                        }
323                    }
324                }
325            }),
326            context("Optional footer", |i| {
327                let (rest, line_end) = opt(line_ending::<_, E>)(i)?;
328                if line_end.is_none() {
329                    // No new line has been found, so the commit message only contains a
330                    // description.
331                    return Ok((i, None));
332                }
333
334                opt(preceded(line_ending, footers))(rest)
335            }),
336        )),
337        |(first_line, body, footers)| {
338            let footers = footers.unwrap_or_else(|| vec![]);
339            let footers = footers
340                .iter()
341                .map(|f| Footer::from(f.0, f.1, f.2))
342                .collect::<Vec<_>>();
343            let is_breaking_change =
344                first_line.2.is_some() || footers.iter().any(|f| is_breaking_change_token(f.token));
345
346            Commit::from(
347                first_line.0,
348                first_line.1,
349                first_line.3,
350                body,
351                is_breaking_change,
352                footers,
353            )
354        },
355    )(i)
356}
357
358/// Parses a conventional commit message.
359///
360/// # Returns
361///
362/// `Ok(Commit)` if the parsing was successful, `Err(VerboseError)` if something
363/// went wrong during parsing.
364pub fn parse_commit_msg<'a>(i: &'a str) -> Result<Commit<'a>, VerboseError<&'a str>> {
365    let result = commit_complete::<VerboseError<_>>(i);
366    result
367        .map_err(|err| match err {
368            nom::Err::Error(err) | nom::Err::Failure(err) => {
369                //println!("{}", nom::error::convert_error(i, err.clone()));
370                err
371            }
372            _ => unreachable!(),
373        })
374        .map(|t| t.1)
375}
376
377#[cfg(test)]
378mod tests {
379    use super::r#type;
380    use crate::parser::{body, description, footer, footer_token, footers, scope};
381    use conventional_commits_types::FooterSeparator;
382    use nom::{
383        error::{ErrorKind, VerboseError},
384        Err::Error,
385        IResult,
386    };
387
388    fn simple_ok(i: &str) -> IResult<&str, &str> {
389        Ok(("", i))
390    }
391
392    fn simple_rest<'a>(rest: &'a str, i: &'a str) -> IResult<&'a str, &'a str> {
393        Ok((rest, i))
394    }
395
396    #[test]
397    fn test_ty() {
398        // ASCII test
399        let i = "type";
400        let res = simple_ok(i);
401        assert_eq!(res, r#type(i));
402
403        // Unicode test
404        let i = "日本";
405        let res = simple_ok(i);
406        assert_eq!(res, r#type(i));
407
408        // Non-alpha1 stops.
409        let i = "日本\n";
410        let res = simple_rest("\n", "日本");
411        assert_eq!(res, r#type(i));
412    }
413
414    #[test]
415    fn test_scope() {
416        // ASCII test
417        let i = "(scope)";
418        let res = Ok(("", "scope"));
419        assert_eq!(res, scope::<VerboseError<&str>>(i));
420
421        // Unicode test
422        let i = "(日本)";
423        let res = Ok(("", "日本"));
424        assert_eq!(res, scope::<VerboseError<&str>>(i));
425
426        // Line breaks stops parsing
427        let i = "(日本\n)";
428        let res = Err(Error(("\n)", ErrorKind::Tag)));
429        assert_eq!(res, scope(i));
430
431        // Missing tags fail parsing
432        let i = "(scope";
433        let res = Err(Error(("", ErrorKind::Tag)));
434        assert_eq!(res, scope(i));
435
436        let i = "scope)";
437        let res = Err(Error(("scope)", ErrorKind::Tag)));
438        assert_eq!(res, scope(i));
439    }
440
441    #[test]
442    fn test_description() {
443        // ASCII test
444        let i = "a short description";
445        let res = simple_ok(i);
446        assert_eq!(res, description(i));
447
448        // Unicode test
449        let i = "日本の本が好き";
450        let res = simple_ok(i);
451        assert_eq!(res, description(i));
452
453        // Newline stops parsing
454        let i = "a short description\n";
455        let res = simple_rest("\n", "a short description");
456        assert_eq!(res, description(i));
457    }
458
459    #[test]
460    //#[ignore]
461    fn test_body() {
462        // // Body without footer
463        let i = include_str!("../tests/body_no_footer.txt");
464        let res = Ok(("", Some(i)));
465        assert_eq!(res, body::<VerboseError<&str>>(i));
466
467        // Body with footer
468        let b = include_str!("../tests/body_no_footer.txt");
469        let i = include_str!("../tests/body_no_footer2.txt");
470        let res = Ok(("\n\nFixes #123", Some(b)));
471        assert_eq!(res, body::<VerboseError<&str>>(i));
472    }
473
474    #[test]
475    fn test_footer_token() {
476        let i = "Fixes";
477        let res = simple_ok(i);
478        assert_eq!(res, footer_token(i));
479
480        let i = "PR-close";
481        let res = simple_ok(i);
482        assert_eq!(res, footer_token(i));
483
484        let i = "Signed-off-by";
485        let res = simple_ok(i);
486        assert_eq!(res, footer_token(i));
487
488        let i = "Signed-off-by-日本";
489        let res = simple_ok(i);
490        assert_eq!(res, footer_token(i));
491    }
492
493    #[test]
494    fn test_footer() {
495        let i = "Fixes #123";
496        let expected = Ok(("", ("Fixes", FooterSeparator::SpaceHashTag, "123")));
497        assert_eq!(expected, footer::<VerboseError<&str>>(&i));
498
499        let i = "\nFixes #123";
500        assert!(footer::<VerboseError<&str>>(&i).is_err());
501
502        let i = "Fixes: 123";
503        let expected = Ok(("", ("Fixes", FooterSeparator::ColonSpace, "123")));
504        assert_eq!(expected, footer::<VerboseError<&str>>(&i));
505
506        let i = "Signed-off-by: me";
507        let expected = Ok(("", ("Signed-off-by", FooterSeparator::ColonSpace, "me")));
508        assert_eq!(expected, footer::<VerboseError<&str>>(&i));
509
510        let i = "Check-日本: yes";
511        let expected = Ok(("", ("Check-日本", FooterSeparator::ColonSpace, "yes")));
512        assert_eq!(expected, footer::<VerboseError<&str>>(&i));
513    }
514
515    #[test]
516    fn test_footers() {
517        let i = "Fixes #123\nPR-Close #432";
518        let expected = Ok((
519            "",
520            vec![
521                ("Fixes", FooterSeparator::SpaceHashTag, "123"),
522                ("PR-Close", FooterSeparator::SpaceHashTag, "432"),
523            ],
524        ));
525        assert_eq!(expected, footers::<VerboseError<&str>>(i));
526    }
527
528    #[cfg(feature = "serde")]
529    #[test]
530    //#[ignore]
531    fn test_serialized_commit_messages() -> anyhow::Result<()> {
532        use super::parse_commit_msg;
533        use conventional_commits_types::Commit;
534        use std::path::Path;
535        use walkdir::{DirEntry, WalkDir};
536
537        let tests_folder_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/serialized");
538        let walker = WalkDir::new(&tests_folder_path).contents_first(true);
539        for entry in walker
540            .into_iter()
541            .filter_entry(|e: &DirEntry| {
542                println!("{}", e.path().display());
543                if let Some(extension) = e.path().extension() {
544                    extension == "txt"
545                } else {
546                    false
547                }
548            })
549            .filter_map(|e| e.ok())
550        {
551            let stem = entry.path().file_stem();
552            let folder_commit_msg_is_in =
553                entry.path().parent().expect("failed to get folder parent");
554
555            let result_ron_file =
556                folder_commit_msg_is_in.join(&format!("{}.ron", stem.unwrap().to_str().unwrap()));
557
558            // Parse the commit and compare it to the saved ron commit.
559            // We trim the end of the commits as the command I (SirWindfield) used for
560            // exporting does append some newlines.
561            let commit_content = std::fs::read_to_string(entry.path())?;
562            let commit_content_trimmed = commit_content.trim_end();
563            let ser_commit_content = std::fs::read_to_string(result_ron_file)?;
564
565            let commit = parse_commit_msg(commit_content_trimmed).expect("parse commit");
566            let ser_commit: Commit<'_> = ron::from_str(&ser_commit_content)?;
567
568            // left ron file, right parsed commit.
569            assert_eq!(ser_commit, commit, "failed at: {:?}", &stem);
570            println!("right assert");
571        }
572
573        Ok(())
574    }
575}