patch_apply/
parser.rs

1use std::borrow::Cow;
2use std::error::Error;
3
4use chrono::DateTime;
5use nom::*;
6use nom::{
7    branch::alt,
8    bytes::complete::{is_not, tag, take_until},
9    character::complete::{char, digit1, line_ending, none_of, not_line_ending, one_of},
10    combinator::{map, not, opt},
11    multi::{many0, many1},
12    sequence::{delimited, preceded, terminated, tuple},
13};
14
15use crate::ast::*;
16
17type Input<'a> = nom_locate::LocatedSpan<&'a str>;
18
19/// Type returned when an error occurs while parsing a patch
20#[derive(Debug, Clone)]
21pub struct ParseError<'a> {
22    /// The line where the parsing error occurred
23    pub line: u32,
24    /// The offset within the input where the parsing error occurred
25    pub offset: usize,
26    /// The failed input
27    pub fragment: &'a str,
28    /// The actual parsing error
29    pub kind: nom::error::ErrorKind,
30}
31
32#[doc(hidden)]
33impl<'a> From<nom::Err<nom::error::Error<Input<'a>>>> for ParseError<'a> {
34    fn from(err: nom::Err<nom::error::Error<Input<'a>>>) -> Self {
35        match err {
36            nom::Err::Incomplete(_) => unreachable!("bug: parser should not return incomplete"),
37            // Unify both error types because at this point the error is not recoverable
38            nom::Err::Error(error) | nom::Err::Failure(error) => Self {
39                line: error.input.location_line(),
40                offset: error.input.location_offset(),
41                fragment: error.input.fragment(),
42                kind: error.code,
43            },
44        }
45    }
46}
47
48impl<'a> std::fmt::Display for ParseError<'a> {
49    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
50        write!(
51            f,
52            "Line {}: Error while parsing: {}",
53            self.line, self.fragment
54        )
55    }
56}
57
58impl<'a> Error for ParseError<'a> {
59    fn description(&self) -> &str {
60        self.kind.description()
61    }
62}
63
64fn consume_content_line(input: Input<'_>) -> IResult<Input<'_>, &str> {
65    let (input, raw) = terminated(not_line_ending, line_ending)(input)?;
66    Ok((input, raw.fragment()))
67}
68
69pub(crate) fn parse_single_patch(s: &str) -> Result<Patch, ParseError<'_>> {
70    let (remaining_input, patch) = patch(Input::new(s))?;
71    // Parser should return an error instead of producing remaining input
72    assert!(
73        remaining_input.fragment().is_empty(),
74        "bug: failed to parse entire input. \
75        Remaining: '{}'",
76        remaining_input.fragment()
77    );
78    Ok(patch)
79}
80
81pub(crate) fn parse_multiple_patches(s: &str) -> Result<Vec<Patch>, ParseError<'_>> {
82    let (remaining_input, patches) = multiple_patches(Input::new(s))?;
83    // Parser should return an error instead of producing remaining input
84    assert!(
85        remaining_input.fragment().is_empty(),
86        "bug: failed to parse entire input. \
87        Remaining: '{}'",
88        remaining_input.fragment()
89    );
90    Ok(patches)
91}
92
93fn multiple_patches(input: Input<'_>) -> IResult<Input<'_>, Vec<Patch>> {
94    many1(patch)(input)
95}
96
97fn patch(input: Input<'_>) -> IResult<Input<'_>, Patch> {
98    let (input, files) = headers(input)?;
99    let (input, hunks) = chunks(input)?;
100    // let (input, no_newline_indicator) = no_newline_indicator(input)?;
101    // Ignore trailing empty lines produced by some diff programs
102    let (input, _) = many0(line_ending)(input)?;
103
104    let (old, new) = files;
105    Ok((
106        input,
107        Patch {
108            old,
109            new,
110            hunks,
111            // end_newline: !no_newline_indicator,
112        },
113    ))
114}
115
116// Header lines
117fn headers(input: Input<'_>) -> IResult<Input<'_>, (File, File)> {
118    // Ignore any preamble lines in produced diffs
119    let (input, _) = take_until("---")(input)?;
120    let (input, _) = tag("--- ")(input)?;
121    let (input, oldfile) = header_line_content(input)?;
122    let (input, _) = line_ending(input)?;
123    let (input, _) = tag("+++ ")(input)?;
124    let (input, newfile) = header_line_content(input)?;
125    let (input, _) = line_ending(input)?;
126    Ok((input, (oldfile, newfile)))
127}
128
129fn header_line_content(input: Input<'_>) -> IResult<Input<'_>, File> {
130    let (input, filename) = filename(input)?;
131    let (input, after) = opt(preceded(char('\t'), file_metadata))(input)?;
132
133    Ok((
134        input,
135        File {
136            path: filename,
137            meta: after.and_then(|after| match after {
138                Cow::Borrowed("") => None,
139                Cow::Borrowed("\t") => None,
140                _ => Some(
141                    DateTime::parse_from_str(after.as_ref(), "%F %T%.f %z")
142                        .or_else(|_| DateTime::parse_from_str(after.as_ref(), "%F %T %z"))
143                        .ok()
144                        .map_or_else(|| FileMetadata::Other(after), FileMetadata::DateTime),
145                ),
146            }),
147        },
148    ))
149}
150
151// Hunks of the file differences
152fn chunks(input: Input<'_>) -> IResult<Input<'_>, Vec<Hunk>> {
153    many1(chunk)(input)
154}
155
156fn chunk(input: Input<'_>) -> IResult<Input<'_>, Hunk> {
157    let (input, ranges) = chunk_header(input)?;
158    let (input, lines) = many1(chunk_line)(input)?;
159
160    let (old_range, new_range, range_hint) = ranges;
161    Ok((
162        input,
163        Hunk {
164            old_range,
165            new_range,
166            range_hint,
167            lines,
168        },
169    ))
170}
171
172fn chunk_header(input: Input<'_>) -> IResult<Input<'_>, (Range, Range, &'_ str)> {
173    let (input, _) = tag("@@ -")(input)?;
174    let (input, old_range) = range(input)?;
175    let (input, _) = tag(" +")(input)?;
176    let (input, new_range) = range(input)?;
177    let (input, _) = tag(" @@")(input)?;
178
179    // Save hint provided after @@ (git sometimes adds this)
180    let (input, range_hint) = not_line_ending(input)?;
181    let (input, _) = line_ending(input)?;
182    Ok((input, (old_range, new_range, &range_hint)))
183}
184
185fn range(input: Input<'_>) -> IResult<Input<'_>, Range> {
186    let (input, start) = u64_digit(input)?;
187    let (input, count) = opt(preceded(char(','), u64_digit))(input)?;
188    let count = count.unwrap_or(1);
189    Ok((input, Range { start, count }))
190}
191
192fn u64_digit(input: Input<'_>) -> IResult<Input<'_>, u64> {
193    let (input, digits) = digit1(input)?;
194    let num = digits.fragment().parse::<u64>().unwrap();
195    Ok((input, num))
196}
197
198// Looks for lines starting with + or - or space, but not +++ or ---. Not a foolproof check.
199//
200// For example, if someone deletes a line that was using the pre-decrement (--) operator or adds a
201// line that was using the pre-increment (++) operator, this will fail.
202//
203// Example where this doesn't work:
204//
205// --- main.c
206// +++ main.c
207// @@ -1,4 +1,7 @@
208// +#include<stdio.h>
209// +
210//  int main() {
211//  double a;
212// --- a;
213// +++ a;
214// +printf("%d\n", a);
215//  }
216//
217// We will fail to parse this entire diff.
218//
219// By checking for `+++ ` instead of just `+++`, we add at least a little more robustness because
220// we know that people typically write `++a`, not `++ a`. That being said, this is still not enough
221// to guarantee correctness in all cases.
222//
223//FIXME: Use the ranges in the chunk header to figure out how many chunk lines to parse. Will need
224// to figure out how to count in nom more robustly than many1!(). Maybe using switch!()?
225//FIXME: The test_parse_triple_plus_minus_hack test will no longer panic when this is fixed.
226fn chunk_line(input: Input<'_>) -> IResult<Input<'_>, Line> {
227    alt((
228        map(
229            preceded(tuple((char('+'), not(tag("++ ")))), consume_content_line),
230            Line::Add,
231        ),
232        map(
233            preceded(tuple((char('-'), not(tag("-- ")))), consume_content_line),
234            Line::Remove,
235        ),
236        map(preceded(char(' '), consume_content_line), Line::Context),
237        map(
238            preceded(tag(NO_NEWLINE_AT_END_OF_FILE), consume_content_line),
239            Line::EndOfFile,
240        ),
241    ))(input)
242}
243
244const NO_NEWLINE_AT_END_OF_FILE: &str = "\\ No newline at end of file";
245
246// Trailing newline indicator
247fn no_newline_indicator(input: Input<'_>) -> IResult<Input<'_>, bool> {
248    map(
249        opt(terminated(tag(NO_NEWLINE_AT_END_OF_FILE), opt(line_ending))),
250        |matched| matched.is_some(),
251    )(input)
252}
253
254fn filename(input: Input<'_>) -> IResult<Input<'_>, Cow<str>> {
255    alt((quoted, bare))(input)
256}
257
258fn file_metadata(input: Input<'_>) -> IResult<Input<'_>, Cow<str>> {
259    alt((
260        quoted,
261        map(not_line_ending, |data: Input<'_>| {
262            Cow::Borrowed(*data.fragment())
263        }),
264    ))(input)
265}
266
267fn quoted(input: Input<'_>) -> IResult<Input<'_>, Cow<str>> {
268    delimited(char('\"'), unescaped_str, char('\"'))(input)
269}
270
271fn bare(input: Input<'_>) -> IResult<Input<'_>, Cow<str>> {
272    map(is_not("\t\r\n"), |data: Input<'_>| {
273        Cow::Borrowed(*data.fragment())
274    })(input)
275}
276
277fn unescaped_str(input: Input<'_>) -> IResult<Input<'_>, Cow<str>> {
278    let (input, raw) = many1(alt((unescaped_char, escaped_char)))(input)?;
279    Ok((input, raw.into_iter().collect::<Cow<str>>()))
280}
281
282// Parses an unescaped character
283fn unescaped_char(input: Input<'_>) -> IResult<Input<'_>, char> {
284    none_of("\0\n\r\t\\\"")(input)
285}
286
287// Parses an escaped character and returns its unescaped equivalent
288fn escaped_char(input: Input<'_>) -> IResult<Input<'_>, char> {
289    map(preceded(char('\\'), one_of(r#"0nrt"\"#)), |ch| match ch {
290        '0' => '\0',
291        'n' => '\n',
292        'r' => '\r',
293        't' => '\t',
294        '"' => '"',
295        '\\' => '\\',
296        _ => unreachable!(),
297    })(input)
298}
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303
304    use pretty_assertions::assert_eq;
305
306    type ParseResult<'a, T> = Result<T, nom::Err<nom::error::Error<Input<'a>>>>;
307
308    // Using a macro instead of a function so that error messages cite the most helpful line number
309    macro_rules! test_parser {
310        ($parser:ident($input:expr) -> @($expected_remaining_input:expr, $expected:expr $(,)*)) => {
311            let (remaining_input, result) = $parser(Input::new($input))?;
312            assert_eq!(*remaining_input.fragment(), $expected_remaining_input,
313                "unexpected remaining input after parse");
314            assert_eq!(result, $expected);
315        };
316        ($parser:ident($input:expr) -> $expected:expr) => {
317            test_parser!($parser($input) -> @("", $expected));
318        };
319    }
320
321    #[test]
322    fn test_unescape() -> ParseResult<'static, ()> {
323        test_parser!(unescaped_str("file \\\"name\\\"") -> "file \"name\"".to_string());
324        Ok(())
325    }
326
327    #[test]
328    fn test_quoted() -> ParseResult<'static, ()> {
329        test_parser!(quoted("\"file name\"") -> "file name".to_string());
330        Ok(())
331    }
332
333    #[test]
334    fn test_bare() -> ParseResult<'static, ()> {
335        test_parser!(bare("file-name ") -> @("", "file-name ".to_string()));
336        test_parser!(bare("file-name\t") -> @("\t", "file-name".to_string()));
337        test_parser!(bare("file-name\n") -> @("\n", "file-name".to_string()));
338        Ok(())
339    }
340
341    #[test]
342    fn test_filename() -> ParseResult<'static, ()> {
343        // bare
344        test_parser!(filename("asdf\t") -> @("\t", "asdf".to_string()));
345
346        // quoted
347        test_parser!(filename(r#""a/My Project/src/foo.rs" "#) -> @(" ", "a/My Project/src/foo.rs".to_string()));
348        test_parser!(filename(r#""\"asdf\" fdsh \\\t\r" "#) -> @(" ", "\"asdf\" fdsh \\\t\r".to_string()));
349        test_parser!(filename(r#""a s\"\nd\0f" "#) -> @(" ", "a s\"\nd\0f".to_string()));
350        Ok(())
351    }
352
353    #[test]
354    fn test_header_line_contents() -> ParseResult<'static, ()> {
355        test_parser!(header_line_content("lao\n") -> @("\n", File {
356            path: "lao".into(),
357            meta: None,
358        }));
359
360        test_parser!(header_line_content("lao\t2002-02-21 23:30:39.942229878 -0800\n") -> @(
361            "\n",
362            File {
363                path: "lao".into(),
364                meta: Some(FileMetadata::DateTime(
365                    DateTime::parse_from_rfc3339("2002-02-21T23:30:39.942229878-08:00").unwrap()
366                )),
367            },
368        ));
369
370        test_parser!(header_line_content("lao\t2002-02-21 23:30:39 -0800\n") -> @(
371            "\n",
372            File {
373                path: "lao".into(),
374                meta: Some(FileMetadata::DateTime(
375                    DateTime::parse_from_rfc3339("2002-02-21T23:30:39-08:00").unwrap()
376                )),
377            },
378        ));
379
380        test_parser!(header_line_content("lao\t08f78e0addd5bf7b7aa8887e406493e75e8d2b55\n") -> @(
381            "\n",
382            File {
383                path: "lao".into(),
384                meta: Some(FileMetadata::Other("08f78e0addd5bf7b7aa8887e406493e75e8d2b55".into()))
385            },
386        ));
387        Ok(())
388    }
389
390    #[test]
391    fn test_headers() -> ParseResult<'static, ()> {
392        let sample = "\
393--- lao	2002-02-21 23:30:39.942229878 -0800
394+++ tzu	2002-02-21 23:30:50.442260588 -0800\n";
395        test_parser!(headers(sample) -> (
396            File {
397                path: "lao".into(),
398                meta: Some(FileMetadata::DateTime(
399                    DateTime::parse_from_rfc3339("2002-02-21T23:30:39.942229878-08:00").unwrap()
400                )),
401            },
402            File {
403                path: "tzu".into(),
404                meta: Some(FileMetadata::DateTime(
405                    DateTime::parse_from_rfc3339("2002-02-21T23:30:50.442260588-08:00").unwrap()
406                )),
407            },
408        ));
409
410        let sample2 = "\
411--- lao
412+++ tzu\n";
413        test_parser!(headers(sample2) -> (
414            File {path: "lao".into(), meta: None},
415            File {path: "tzu".into(), meta: None},
416        ));
417
418        let sample2b = "\
419--- lao	
420+++ tzu	\n";
421        test_parser!(headers(sample2b) -> (
422            File {path: "lao".into(), meta: None},
423            File {path: "tzu".into(), meta: None},
424        ));
425
426        let sample3 = "\
427--- lao	08f78e0addd5bf7b7aa8887e406493e75e8d2b55
428+++ tzu	e044048282ce75186ecc7a214fd3d9ba478a2816\n";
429        test_parser!(headers(sample3) -> (
430            File {
431                path: "lao".into(),
432                meta: Some(FileMetadata::Other("08f78e0addd5bf7b7aa8887e406493e75e8d2b55".into())),
433            },
434            File {
435                path: "tzu".into(),
436                meta: Some(FileMetadata::Other("e044048282ce75186ecc7a214fd3d9ba478a2816".into())),
437            },
438        ));
439        Ok(())
440    }
441
442    #[test]
443    fn test_headers_crlf() -> ParseResult<'static, ()> {
444        let sample = "\
445--- lao	2002-02-21 23:30:39.942229878 -0800\r
446+++ tzu	2002-02-21 23:30:50.442260588 -0800\r\n";
447        test_parser!(headers(sample) -> (
448            File {
449                path: "lao".into(),
450                meta: Some(FileMetadata::DateTime(
451                    DateTime::parse_from_rfc3339("2002-02-21T23:30:39.942229878-08:00").unwrap()
452                )),
453            },
454            File {
455                path: "tzu".into(),
456                meta: Some(FileMetadata::DateTime(
457                    DateTime::parse_from_rfc3339("2002-02-21T23:30:50.442260588-08:00").unwrap()
458                )),
459            },
460        ));
461        Ok(())
462    }
463
464    #[test]
465    fn test_range() -> ParseResult<'static, ()> {
466        test_parser!(range("1,7") -> Range { start: 1, count: 7 });
467
468        test_parser!(range("2") -> Range { start: 2, count: 1 });
469        Ok(())
470    }
471
472    #[test]
473    fn test_chunk_header() -> ParseResult<'static, ()> {
474        test_parser!(chunk_header("@@ -1,7 +1,6 @@ foo bar\n") -> (
475            Range { start: 1, count: 7 },
476            Range { start: 1, count: 6 },
477            " foo bar",
478        ));
479        Ok(())
480    }
481
482    #[test]
483    fn test_chunk() -> ParseResult<'static, ()> {
484        let sample = "\
485@@ -1,7 +1,6 @@
486-The Way that can be told of is not the eternal Way;
487-The name that can be named is not the eternal name.
488 The Nameless is the origin of Heaven and Earth;
489-The Named is the mother of all things.
490+The named is the mother of all things.
491+
492 Therefore let there always be non-being,
493   so we may see their subtlety,
494 And let there always be being,\n";
495        let expected = Hunk {
496            old_range: Range { start: 1, count: 7 },
497            new_range: Range { start: 1, count: 6 },
498            range_hint: "",
499            lines: vec![
500                Line::Remove("The Way that can be told of is not the eternal Way;"),
501                Line::Remove("The name that can be named is not the eternal name."),
502                Line::Context("The Nameless is the origin of Heaven and Earth;"),
503                Line::Remove("The Named is the mother of all things."),
504                Line::Add("The named is the mother of all things."),
505                Line::Add(""),
506                Line::Context("Therefore let there always be non-being,"),
507                Line::Context("  so we may see their subtlety,"),
508                Line::Context("And let there always be being,"),
509            ],
510        };
511        test_parser!(chunk(sample) -> expected);
512        Ok(())
513    }
514
515    #[test]
516    fn test_patch() -> ParseResult<'static, ()> {
517        // https://www.gnu.org/software/diffutils/manual/html_node/Example-Unified.html
518        let sample = "\
519--- lao	2002-02-21 23:30:39.942229878 -0800
520+++ tzu	2002-02-21 23:30:50.442260588 -0800
521@@ -1,7 +1,6 @@
522-The Way that can be told of is not the eternal Way;
523-The name that can be named is not the eternal name.
524 The Nameless is the origin of Heaven and Earth;
525-The Named is the mother of all things.
526+The named is the mother of all things.
527+
528 Therefore let there always be non-being,
529   so we may see their subtlety,
530 And let there always be being,
531@@ -9,3 +8,6 @@
532 The two are the same,
533 But after they are produced,
534   they have different names.
535+They both may be called deep and profound.
536+Deeper and more profound,
537+The door of all subtleties!\n";
538
539        let expected = Patch {
540            old: File {
541                path: "lao".into(),
542                meta: Some(FileMetadata::DateTime(
543                    DateTime::parse_from_rfc3339("2002-02-21T23:30:39.942229878-08:00").unwrap(),
544                )),
545            },
546            new: File {
547                path: "tzu".into(),
548                meta: Some(FileMetadata::DateTime(
549                    DateTime::parse_from_rfc3339("2002-02-21T23:30:50.442260588-08:00").unwrap(),
550                )),
551            },
552            hunks: vec![
553                Hunk {
554                    old_range: Range { start: 1, count: 7 },
555                    new_range: Range { start: 1, count: 6 },
556                    range_hint: "",
557                    lines: vec![
558                        Line::Remove("The Way that can be told of is not the eternal Way;"),
559                        Line::Remove("The name that can be named is not the eternal name."),
560                        Line::Context("The Nameless is the origin of Heaven and Earth;"),
561                        Line::Remove("The Named is the mother of all things."),
562                        Line::Add("The named is the mother of all things."),
563                        Line::Add(""),
564                        Line::Context("Therefore let there always be non-being,"),
565                        Line::Context("  so we may see their subtlety,"),
566                        Line::Context("And let there always be being,"),
567                    ],
568                },
569                Hunk {
570                    old_range: Range { start: 9, count: 3 },
571                    new_range: Range { start: 8, count: 6 },
572                    range_hint: "",
573                    lines: vec![
574                        Line::Context("The two are the same,"),
575                        Line::Context("But after they are produced,"),
576                        Line::Context("  they have different names."),
577                        Line::Add("They both may be called deep and profound."),
578                        Line::Add("Deeper and more profound,"),
579                        Line::Add("The door of all subtleties!"),
580                    ],
581                },
582            ],
583            // end_newline: true,
584        };
585
586        test_parser!(patch(sample) -> expected);
587
588        assert_eq!(format!("{}\n", expected), sample);
589
590        Ok(())
591    }
592}