1use std::borrow::Cow;
2use std::error::Error;
3
4use chrono::DateTime;
5use nom::*;
6use nom::{
7 branch::alt,
8 bytes::complete::{is_not, tag, take_until},
9 character::complete::{char, digit1, line_ending, none_of, not_line_ending, one_of},
10 combinator::{map, not, opt},
11 multi::{many0, many1},
12 sequence::{delimited, preceded, terminated, tuple},
13};
14
15use crate::ast::*;
16
17type Input<'a> = nom_locate::LocatedSpan<&'a str>;
18
19#[derive(Debug, Clone)]
21pub struct ParseError<'a> {
22 pub line: u32,
24 pub offset: usize,
26 pub fragment: &'a str,
28 pub kind: nom::error::ErrorKind,
30}
31
32#[doc(hidden)]
33impl<'a> From<nom::Err<nom::error::Error<Input<'a>>>> for ParseError<'a> {
34 fn from(err: nom::Err<nom::error::Error<Input<'a>>>) -> Self {
35 match err {
36 nom::Err::Incomplete(_) => unreachable!("bug: parser should not return incomplete"),
37 nom::Err::Error(error) | nom::Err::Failure(error) => Self {
39 line: error.input.location_line(),
40 offset: error.input.location_offset(),
41 fragment: error.input.fragment(),
42 kind: error.code,
43 },
44 }
45 }
46}
47
48impl<'a> std::fmt::Display for ParseError<'a> {
49 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
50 write!(
51 f,
52 "Line {}: Error while parsing: {}",
53 self.line, self.fragment
54 )
55 }
56}
57
58impl<'a> Error for ParseError<'a> {
59 fn description(&self) -> &str {
60 self.kind.description()
61 }
62}
63
64fn consume_content_line(input: Input<'_>) -> IResult<Input<'_>, &str> {
65 let (input, raw) = terminated(not_line_ending, line_ending)(input)?;
66 Ok((input, raw.fragment()))
67}
68
69pub(crate) fn parse_single_patch(s: &str) -> Result<Patch, ParseError<'_>> {
70 let (remaining_input, patch) = patch(Input::new(s))?;
71 assert!(
73 remaining_input.fragment().is_empty(),
74 "bug: failed to parse entire input. \
75 Remaining: '{}'",
76 remaining_input.fragment()
77 );
78 Ok(patch)
79}
80
81pub(crate) fn parse_multiple_patches(s: &str) -> Result<Vec<Patch>, ParseError<'_>> {
82 let (remaining_input, patches) = multiple_patches(Input::new(s))?;
83 assert!(
85 remaining_input.fragment().is_empty(),
86 "bug: failed to parse entire input. \
87 Remaining: '{}'",
88 remaining_input.fragment()
89 );
90 Ok(patches)
91}
92
93fn multiple_patches(input: Input<'_>) -> IResult<Input<'_>, Vec<Patch>> {
94 many1(patch)(input)
95}
96
97fn patch(input: Input<'_>) -> IResult<Input<'_>, Patch> {
98 let (input, files) = headers(input)?;
99 let (input, hunks) = chunks(input)?;
100 let (input, _) = many0(line_ending)(input)?;
103
104 let (old, new) = files;
105 Ok((
106 input,
107 Patch {
108 old,
109 new,
110 hunks,
111 },
113 ))
114}
115
116fn headers(input: Input<'_>) -> IResult<Input<'_>, (File, File)> {
118 let (input, _) = take_until("---")(input)?;
120 let (input, _) = tag("--- ")(input)?;
121 let (input, oldfile) = header_line_content(input)?;
122 let (input, _) = line_ending(input)?;
123 let (input, _) = tag("+++ ")(input)?;
124 let (input, newfile) = header_line_content(input)?;
125 let (input, _) = line_ending(input)?;
126 Ok((input, (oldfile, newfile)))
127}
128
129fn header_line_content(input: Input<'_>) -> IResult<Input<'_>, File> {
130 let (input, filename) = filename(input)?;
131 let (input, after) = opt(preceded(char('\t'), file_metadata))(input)?;
132
133 Ok((
134 input,
135 File {
136 path: filename,
137 meta: after.and_then(|after| match after {
138 Cow::Borrowed("") => None,
139 Cow::Borrowed("\t") => None,
140 _ => Some(
141 DateTime::parse_from_str(after.as_ref(), "%F %T%.f %z")
142 .or_else(|_| DateTime::parse_from_str(after.as_ref(), "%F %T %z"))
143 .ok()
144 .map_or_else(|| FileMetadata::Other(after), FileMetadata::DateTime),
145 ),
146 }),
147 },
148 ))
149}
150
151fn chunks(input: Input<'_>) -> IResult<Input<'_>, Vec<Hunk>> {
153 many1(chunk)(input)
154}
155
156fn chunk(input: Input<'_>) -> IResult<Input<'_>, Hunk> {
157 let (input, ranges) = chunk_header(input)?;
158 let (input, lines) = many1(chunk_line)(input)?;
159
160 let (old_range, new_range, range_hint) = ranges;
161 Ok((
162 input,
163 Hunk {
164 old_range,
165 new_range,
166 range_hint,
167 lines,
168 },
169 ))
170}
171
172fn chunk_header(input: Input<'_>) -> IResult<Input<'_>, (Range, Range, &'_ str)> {
173 let (input, _) = tag("@@ -")(input)?;
174 let (input, old_range) = range(input)?;
175 let (input, _) = tag(" +")(input)?;
176 let (input, new_range) = range(input)?;
177 let (input, _) = tag(" @@")(input)?;
178
179 let (input, range_hint) = not_line_ending(input)?;
181 let (input, _) = line_ending(input)?;
182 Ok((input, (old_range, new_range, &range_hint)))
183}
184
185fn range(input: Input<'_>) -> IResult<Input<'_>, Range> {
186 let (input, start) = u64_digit(input)?;
187 let (input, count) = opt(preceded(char(','), u64_digit))(input)?;
188 let count = count.unwrap_or(1);
189 Ok((input, Range { start, count }))
190}
191
192fn u64_digit(input: Input<'_>) -> IResult<Input<'_>, u64> {
193 let (input, digits) = digit1(input)?;
194 let num = digits.fragment().parse::<u64>().unwrap();
195 Ok((input, num))
196}
197
198fn chunk_line(input: Input<'_>) -> IResult<Input<'_>, Line> {
227 alt((
228 map(
229 preceded(tuple((char('+'), not(tag("++ ")))), consume_content_line),
230 Line::Add,
231 ),
232 map(
233 preceded(tuple((char('-'), not(tag("-- ")))), consume_content_line),
234 Line::Remove,
235 ),
236 map(preceded(char(' '), consume_content_line), Line::Context),
237 map(
238 preceded(tag(NO_NEWLINE_AT_END_OF_FILE), consume_content_line),
239 Line::EndOfFile,
240 ),
241 ))(input)
242}
243
244const NO_NEWLINE_AT_END_OF_FILE: &str = "\\ No newline at end of file";
245
246fn no_newline_indicator(input: Input<'_>) -> IResult<Input<'_>, bool> {
248 map(
249 opt(terminated(tag(NO_NEWLINE_AT_END_OF_FILE), opt(line_ending))),
250 |matched| matched.is_some(),
251 )(input)
252}
253
254fn filename(input: Input<'_>) -> IResult<Input<'_>, Cow<str>> {
255 alt((quoted, bare))(input)
256}
257
258fn file_metadata(input: Input<'_>) -> IResult<Input<'_>, Cow<str>> {
259 alt((
260 quoted,
261 map(not_line_ending, |data: Input<'_>| {
262 Cow::Borrowed(*data.fragment())
263 }),
264 ))(input)
265}
266
267fn quoted(input: Input<'_>) -> IResult<Input<'_>, Cow<str>> {
268 delimited(char('\"'), unescaped_str, char('\"'))(input)
269}
270
271fn bare(input: Input<'_>) -> IResult<Input<'_>, Cow<str>> {
272 map(is_not("\t\r\n"), |data: Input<'_>| {
273 Cow::Borrowed(*data.fragment())
274 })(input)
275}
276
277fn unescaped_str(input: Input<'_>) -> IResult<Input<'_>, Cow<str>> {
278 let (input, raw) = many1(alt((unescaped_char, escaped_char)))(input)?;
279 Ok((input, raw.into_iter().collect::<Cow<str>>()))
280}
281
282fn unescaped_char(input: Input<'_>) -> IResult<Input<'_>, char> {
284 none_of("\0\n\r\t\\\"")(input)
285}
286
287fn escaped_char(input: Input<'_>) -> IResult<Input<'_>, char> {
289 map(preceded(char('\\'), one_of(r#"0nrt"\"#)), |ch| match ch {
290 '0' => '\0',
291 'n' => '\n',
292 'r' => '\r',
293 't' => '\t',
294 '"' => '"',
295 '\\' => '\\',
296 _ => unreachable!(),
297 })(input)
298}
299
300#[cfg(test)]
301mod tests {
302 use super::*;
303
304 use pretty_assertions::assert_eq;
305
306 type ParseResult<'a, T> = Result<T, nom::Err<nom::error::Error<Input<'a>>>>;
307
308 macro_rules! test_parser {
310 ($parser:ident($input:expr) -> @($expected_remaining_input:expr, $expected:expr $(,)*)) => {
311 let (remaining_input, result) = $parser(Input::new($input))?;
312 assert_eq!(*remaining_input.fragment(), $expected_remaining_input,
313 "unexpected remaining input after parse");
314 assert_eq!(result, $expected);
315 };
316 ($parser:ident($input:expr) -> $expected:expr) => {
317 test_parser!($parser($input) -> @("", $expected));
318 };
319 }
320
321 #[test]
322 fn test_unescape() -> ParseResult<'static, ()> {
323 test_parser!(unescaped_str("file \\\"name\\\"") -> "file \"name\"".to_string());
324 Ok(())
325 }
326
327 #[test]
328 fn test_quoted() -> ParseResult<'static, ()> {
329 test_parser!(quoted("\"file name\"") -> "file name".to_string());
330 Ok(())
331 }
332
333 #[test]
334 fn test_bare() -> ParseResult<'static, ()> {
335 test_parser!(bare("file-name ") -> @("", "file-name ".to_string()));
336 test_parser!(bare("file-name\t") -> @("\t", "file-name".to_string()));
337 test_parser!(bare("file-name\n") -> @("\n", "file-name".to_string()));
338 Ok(())
339 }
340
341 #[test]
342 fn test_filename() -> ParseResult<'static, ()> {
343 test_parser!(filename("asdf\t") -> @("\t", "asdf".to_string()));
345
346 test_parser!(filename(r#""a/My Project/src/foo.rs" "#) -> @(" ", "a/My Project/src/foo.rs".to_string()));
348 test_parser!(filename(r#""\"asdf\" fdsh \\\t\r" "#) -> @(" ", "\"asdf\" fdsh \\\t\r".to_string()));
349 test_parser!(filename(r#""a s\"\nd\0f" "#) -> @(" ", "a s\"\nd\0f".to_string()));
350 Ok(())
351 }
352
353 #[test]
354 fn test_header_line_contents() -> ParseResult<'static, ()> {
355 test_parser!(header_line_content("lao\n") -> @("\n", File {
356 path: "lao".into(),
357 meta: None,
358 }));
359
360 test_parser!(header_line_content("lao\t2002-02-21 23:30:39.942229878 -0800\n") -> @(
361 "\n",
362 File {
363 path: "lao".into(),
364 meta: Some(FileMetadata::DateTime(
365 DateTime::parse_from_rfc3339("2002-02-21T23:30:39.942229878-08:00").unwrap()
366 )),
367 },
368 ));
369
370 test_parser!(header_line_content("lao\t2002-02-21 23:30:39 -0800\n") -> @(
371 "\n",
372 File {
373 path: "lao".into(),
374 meta: Some(FileMetadata::DateTime(
375 DateTime::parse_from_rfc3339("2002-02-21T23:30:39-08:00").unwrap()
376 )),
377 },
378 ));
379
380 test_parser!(header_line_content("lao\t08f78e0addd5bf7b7aa8887e406493e75e8d2b55\n") -> @(
381 "\n",
382 File {
383 path: "lao".into(),
384 meta: Some(FileMetadata::Other("08f78e0addd5bf7b7aa8887e406493e75e8d2b55".into()))
385 },
386 ));
387 Ok(())
388 }
389
390 #[test]
391 fn test_headers() -> ParseResult<'static, ()> {
392 let sample = "\
393--- lao 2002-02-21 23:30:39.942229878 -0800
394+++ tzu 2002-02-21 23:30:50.442260588 -0800\n";
395 test_parser!(headers(sample) -> (
396 File {
397 path: "lao".into(),
398 meta: Some(FileMetadata::DateTime(
399 DateTime::parse_from_rfc3339("2002-02-21T23:30:39.942229878-08:00").unwrap()
400 )),
401 },
402 File {
403 path: "tzu".into(),
404 meta: Some(FileMetadata::DateTime(
405 DateTime::parse_from_rfc3339("2002-02-21T23:30:50.442260588-08:00").unwrap()
406 )),
407 },
408 ));
409
410 let sample2 = "\
411--- lao
412+++ tzu\n";
413 test_parser!(headers(sample2) -> (
414 File {path: "lao".into(), meta: None},
415 File {path: "tzu".into(), meta: None},
416 ));
417
418 let sample2b = "\
419--- lao
420+++ tzu \n";
421 test_parser!(headers(sample2b) -> (
422 File {path: "lao".into(), meta: None},
423 File {path: "tzu".into(), meta: None},
424 ));
425
426 let sample3 = "\
427--- lao 08f78e0addd5bf7b7aa8887e406493e75e8d2b55
428+++ tzu e044048282ce75186ecc7a214fd3d9ba478a2816\n";
429 test_parser!(headers(sample3) -> (
430 File {
431 path: "lao".into(),
432 meta: Some(FileMetadata::Other("08f78e0addd5bf7b7aa8887e406493e75e8d2b55".into())),
433 },
434 File {
435 path: "tzu".into(),
436 meta: Some(FileMetadata::Other("e044048282ce75186ecc7a214fd3d9ba478a2816".into())),
437 },
438 ));
439 Ok(())
440 }
441
442 #[test]
443 fn test_headers_crlf() -> ParseResult<'static, ()> {
444 let sample = "\
445--- lao 2002-02-21 23:30:39.942229878 -0800\r
446+++ tzu 2002-02-21 23:30:50.442260588 -0800\r\n";
447 test_parser!(headers(sample) -> (
448 File {
449 path: "lao".into(),
450 meta: Some(FileMetadata::DateTime(
451 DateTime::parse_from_rfc3339("2002-02-21T23:30:39.942229878-08:00").unwrap()
452 )),
453 },
454 File {
455 path: "tzu".into(),
456 meta: Some(FileMetadata::DateTime(
457 DateTime::parse_from_rfc3339("2002-02-21T23:30:50.442260588-08:00").unwrap()
458 )),
459 },
460 ));
461 Ok(())
462 }
463
464 #[test]
465 fn test_range() -> ParseResult<'static, ()> {
466 test_parser!(range("1,7") -> Range { start: 1, count: 7 });
467
468 test_parser!(range("2") -> Range { start: 2, count: 1 });
469 Ok(())
470 }
471
472 #[test]
473 fn test_chunk_header() -> ParseResult<'static, ()> {
474 test_parser!(chunk_header("@@ -1,7 +1,6 @@ foo bar\n") -> (
475 Range { start: 1, count: 7 },
476 Range { start: 1, count: 6 },
477 " foo bar",
478 ));
479 Ok(())
480 }
481
482 #[test]
483 fn test_chunk() -> ParseResult<'static, ()> {
484 let sample = "\
485@@ -1,7 +1,6 @@
486-The Way that can be told of is not the eternal Way;
487-The name that can be named is not the eternal name.
488 The Nameless is the origin of Heaven and Earth;
489-The Named is the mother of all things.
490+The named is the mother of all things.
491+
492 Therefore let there always be non-being,
493 so we may see their subtlety,
494 And let there always be being,\n";
495 let expected = Hunk {
496 old_range: Range { start: 1, count: 7 },
497 new_range: Range { start: 1, count: 6 },
498 range_hint: "",
499 lines: vec![
500 Line::Remove("The Way that can be told of is not the eternal Way;"),
501 Line::Remove("The name that can be named is not the eternal name."),
502 Line::Context("The Nameless is the origin of Heaven and Earth;"),
503 Line::Remove("The Named is the mother of all things."),
504 Line::Add("The named is the mother of all things."),
505 Line::Add(""),
506 Line::Context("Therefore let there always be non-being,"),
507 Line::Context(" so we may see their subtlety,"),
508 Line::Context("And let there always be being,"),
509 ],
510 };
511 test_parser!(chunk(sample) -> expected);
512 Ok(())
513 }
514
515 #[test]
516 fn test_patch() -> ParseResult<'static, ()> {
517 let sample = "\
519--- lao 2002-02-21 23:30:39.942229878 -0800
520+++ tzu 2002-02-21 23:30:50.442260588 -0800
521@@ -1,7 +1,6 @@
522-The Way that can be told of is not the eternal Way;
523-The name that can be named is not the eternal name.
524 The Nameless is the origin of Heaven and Earth;
525-The Named is the mother of all things.
526+The named is the mother of all things.
527+
528 Therefore let there always be non-being,
529 so we may see their subtlety,
530 And let there always be being,
531@@ -9,3 +8,6 @@
532 The two are the same,
533 But after they are produced,
534 they have different names.
535+They both may be called deep and profound.
536+Deeper and more profound,
537+The door of all subtleties!\n";
538
539 let expected = Patch {
540 old: File {
541 path: "lao".into(),
542 meta: Some(FileMetadata::DateTime(
543 DateTime::parse_from_rfc3339("2002-02-21T23:30:39.942229878-08:00").unwrap(),
544 )),
545 },
546 new: File {
547 path: "tzu".into(),
548 meta: Some(FileMetadata::DateTime(
549 DateTime::parse_from_rfc3339("2002-02-21T23:30:50.442260588-08:00").unwrap(),
550 )),
551 },
552 hunks: vec![
553 Hunk {
554 old_range: Range { start: 1, count: 7 },
555 new_range: Range { start: 1, count: 6 },
556 range_hint: "",
557 lines: vec![
558 Line::Remove("The Way that can be told of is not the eternal Way;"),
559 Line::Remove("The name that can be named is not the eternal name."),
560 Line::Context("The Nameless is the origin of Heaven and Earth;"),
561 Line::Remove("The Named is the mother of all things."),
562 Line::Add("The named is the mother of all things."),
563 Line::Add(""),
564 Line::Context("Therefore let there always be non-being,"),
565 Line::Context(" so we may see their subtlety,"),
566 Line::Context("And let there always be being,"),
567 ],
568 },
569 Hunk {
570 old_range: Range { start: 9, count: 3 },
571 new_range: Range { start: 8, count: 6 },
572 range_hint: "",
573 lines: vec![
574 Line::Context("The two are the same,"),
575 Line::Context("But after they are produced,"),
576 Line::Context(" they have different names."),
577 Line::Add("They both may be called deep and profound."),
578 Line::Add("Deeper and more profound,"),
579 Line::Add("The door of all subtleties!"),
580 ],
581 },
582 ],
583 };
585
586 test_parser!(patch(sample) -> expected);
587
588 assert_eq!(format!("{}\n", expected), sample);
589
590 Ok(())
591 }
592}