diffy_imara/patch/
parse.rs

1//! Parse a Patch
2
3use super::{Hunk, HunkRange, Line, ESCAPED_CHARS_BYTES, NO_NEWLINE_AT_EOF};
4use crate::{
5    patch::Patch,
6    utils::{LineIter, Text},
7};
8use std::{borrow::Cow, fmt};
9
10type Result<T, E = ParsePatchError> = std::result::Result<T, E>;
11
12/// An error returned when parsing a `Patch` using [`Patch::from_str`] fails
13///
14/// [`Patch::from_str`]: struct.Patch.html#method.from_str
15// TODO use a custom error type instead of a Cow
16#[derive(Debug)]
17pub struct ParsePatchError(Cow<'static, str>);
18
19impl ParsePatchError {
20    fn new<E: Into<Cow<'static, str>>>(e: E) -> Self {
21        Self(e.into())
22    }
23}
24
25impl fmt::Display for ParsePatchError {
26    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
27        write!(f, "error parsing patch: {}", self.0)
28    }
29}
30
31impl std::error::Error for ParsePatchError {}
32
33struct Parser<'a, T: Text + ?Sized> {
34    lines: std::iter::Peekable<LineIter<'a, T>>,
35}
36
37impl<'a, T: Text + ?Sized> Parser<'a, T> {
38    fn new(input: &'a T) -> Self {
39        Self {
40            lines: LineIter::new(input).peekable(),
41        }
42    }
43
44    fn peek(&mut self) -> Option<&&'a T> {
45        self.lines.peek()
46    }
47
48    fn next(&mut self) -> Result<&'a T> {
49        let line = self
50            .lines
51            .next()
52            .ok_or_else(|| ParsePatchError::new("unexpected EOF"))?;
53        Ok(line)
54    }
55}
56
57pub fn parse(input: &str) -> Result<Patch<'_, str>> {
58    let mut parser = Parser::new(input);
59    let header = patch_header(&mut parser)?;
60    let hunks = hunks(&mut parser)?;
61
62    Ok(Patch::new(
63        header.0.map(convert_cow_to_str),
64        header.1.map(convert_cow_to_str),
65        hunks,
66    ))
67}
68
69pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> {
70    let mut parser = Parser::new(input);
71    let header = patch_header(&mut parser)?;
72    let hunks = hunks(&mut parser)?;
73
74    Ok(Patch::new(header.0, header.1, hunks))
75}
76
77// This is only used when the type originated as a utf8 string
78fn convert_cow_to_str(cow: Cow<'_, [u8]>) -> Cow<'_, str> {
79    match cow {
80        Cow::Borrowed(b) => std::str::from_utf8(b).unwrap().into(),
81        Cow::Owned(o) => String::from_utf8(o).unwrap().into(),
82    }
83}
84
85#[allow(clippy::type_complexity)]
86fn patch_header<'a, T: Text + ToOwned + ?Sized>(
87    parser: &mut Parser<'a, T>,
88) -> Result<(Option<Cow<'a, [u8]>>, Option<Cow<'a, [u8]>>)> {
89    skip_header_preamble(parser)?;
90
91    let mut filename1 = None;
92    let mut filename2 = None;
93
94    while let Some(line) = parser.peek() {
95        if line.starts_with("--- ") {
96            if filename1.is_some() {
97                return Err(ParsePatchError::new("multiple '---' lines"));
98            }
99            filename1 = Some(parse_filename("--- ", parser.next()?)?);
100        } else if line.starts_with("+++ ") {
101            if filename2.is_some() {
102                return Err(ParsePatchError::new("multiple '+++' lines"));
103            }
104            filename2 = Some(parse_filename("+++ ", parser.next()?)?);
105        } else {
106            break;
107        }
108    }
109
110    Ok((filename1, filename2))
111}
112
113// Skip to the first filename header ("--- " or "+++ ") or hunk line,
114// skipping any preamble lines like "diff --git", etc.
115fn skip_header_preamble<T: Text + ?Sized>(parser: &mut Parser<'_, T>) -> Result<()> {
116    while let Some(line) = parser.peek() {
117        if line.starts_with("--- ") | line.starts_with("+++ ") | line.starts_with("@@ ") {
118            break;
119        }
120        parser.next()?;
121    }
122
123    Ok(())
124}
125
126fn parse_filename<'a, T: Text + ToOwned + ?Sized>(
127    prefix: &str,
128    line: &'a T,
129) -> Result<Cow<'a, [u8]>> {
130    let line = line
131        .strip_prefix(prefix)
132        .ok_or_else(|| ParsePatchError::new("unable to parse filename"))?;
133
134    let filename = if let Some((filename, _)) = line.split_at_exclusive("\t") {
135        filename
136    } else if let Some((filename, _)) = line.split_at_exclusive("\n") {
137        filename
138    } else {
139        return Err(ParsePatchError::new("filename unterminated"));
140    };
141
142    let filename = if let Some(quoted) = is_quoted(filename) {
143        escaped_filename(quoted)?
144    } else {
145        unescaped_filename(filename)?
146    };
147
148    Ok(filename)
149}
150
151fn is_quoted<T: Text + ?Sized>(s: &T) -> Option<&T> {
152    s.strip_prefix("\"").and_then(|s| s.strip_suffix("\""))
153}
154
155fn unescaped_filename<T: Text + ToOwned + ?Sized>(filename: &T) -> Result<Cow<'_, [u8]>> {
156    let bytes = filename.as_bytes();
157
158    if bytes.iter().any(|b| ESCAPED_CHARS_BYTES.contains(b)) {
159        return Err(ParsePatchError::new("invalid char in unquoted filename"));
160    }
161
162    Ok(bytes.into())
163}
164
165fn escaped_filename<T: Text + ToOwned + ?Sized>(escaped: &T) -> Result<Cow<'_, [u8]>> {
166    let mut filename = Vec::new();
167
168    let mut chars = escaped.as_bytes().iter().copied();
169    while let Some(c) = chars.next() {
170        if c == b'\\' {
171            let ch = match chars
172                .next()
173                .ok_or_else(|| ParsePatchError::new("expected escaped character"))?
174            {
175                b'n' => b'\n',
176                b't' => b'\t',
177                b'0' => b'\0',
178                b'r' => b'\r',
179                b'\"' => b'\"',
180                b'\\' => b'\\',
181                _ => return Err(ParsePatchError::new("invalid escaped character")),
182            };
183            filename.push(ch);
184        } else if ESCAPED_CHARS_BYTES.contains(&c) {
185            return Err(ParsePatchError::new("invalid unescaped character"));
186        } else {
187            filename.push(c);
188        }
189    }
190
191    Ok(filename.into())
192}
193
194fn verify_hunks_in_order<T: ?Sized>(hunks: &[Hunk<'_, T>]) -> bool {
195    for hunk in hunks.windows(2) {
196        if hunk[0].old_range.end() > hunk[1].old_range.start()
197            || hunk[0].new_range.end() > hunk[1].new_range.start()
198        {
199            return false;
200        }
201    }
202    true
203}
204
205fn hunks<'a, T: Text + ?Sized>(parser: &mut Parser<'a, T>) -> Result<Vec<Hunk<'a, T>>> {
206    let mut hunks = Vec::new();
207    while parser.peek().is_some() {
208        hunks.push(hunk(parser)?);
209    }
210
211    // check and verify that the Hunks are in sorted order and don't overlap
212    if !verify_hunks_in_order(&hunks) {
213        return Err(ParsePatchError::new("Hunks not in order or overlap"));
214    }
215
216    Ok(hunks)
217}
218
219fn hunk<'a, T: Text + ?Sized>(parser: &mut Parser<'a, T>) -> Result<Hunk<'a, T>> {
220    let (range1, range2, function_context) = hunk_header(parser.next()?)?;
221    let lines = hunk_lines(parser)?;
222
223    // check counts of lines to see if they match the ranges in the hunk header
224    let (len1, len2) = super::hunk_lines_count(&lines);
225    if len1 != range1.len || len2 != range2.len {
226        return Err(ParsePatchError::new("Hunk header does not match hunk"));
227    }
228
229    Ok(Hunk::new(range1, range2, function_context, lines))
230}
231
232fn hunk_header<T: Text + ?Sized>(input: &T) -> Result<(HunkRange, HunkRange, Option<&T>)> {
233    let input = input
234        .strip_prefix("@@ ")
235        .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?;
236
237    let (ranges, function_context) = input
238        .split_at_exclusive(" @@")
239        .ok_or_else(|| ParsePatchError::new("hunk header unterminated"))?;
240    let function_context = function_context.strip_prefix(" ");
241
242    let (range1, range2) = ranges
243        .split_at_exclusive(" ")
244        .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?;
245    let range1 = range(
246        range1
247            .strip_prefix("-")
248            .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?,
249    )?;
250    let range2 = range(
251        range2
252            .strip_prefix("+")
253            .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?,
254    )?;
255    Ok((range1, range2, function_context))
256}
257
258fn range<T: Text + ?Sized>(s: &T) -> Result<HunkRange> {
259    let (start, len) = if let Some((start, len)) = s.split_at_exclusive(",") {
260        (
261            start
262                .parse()
263                .ok_or_else(|| ParsePatchError::new("can't parse range"))?,
264            len.parse()
265                .ok_or_else(|| ParsePatchError::new("can't parse range"))?,
266        )
267    } else {
268        (
269            s.parse()
270                .ok_or_else(|| ParsePatchError::new("can't parse range"))?,
271            1,
272        )
273    };
274
275    Ok(HunkRange::new(start, len))
276}
277
278fn hunk_lines<'a, T: Text + ?Sized>(parser: &mut Parser<'a, T>) -> Result<Vec<Line<'a, T>>> {
279    let mut lines: Vec<Line<'a, T>> = Vec::new();
280    let mut no_newline_context = false;
281    let mut no_newline_delete = false;
282    let mut no_newline_insert = false;
283
284    while let Some(line) = parser.peek() {
285        let line = if line.starts_with("@") {
286            break;
287        } else if no_newline_context {
288            return Err(ParsePatchError::new("expected end of hunk"));
289        } else if let Some(line) = line.strip_prefix(" ") {
290            Line::Context(line)
291        } else if line.starts_with("\n") {
292            Line::Context(*line)
293        } else if let Some(line) = line.strip_prefix("-") {
294            if no_newline_delete {
295                return Err(ParsePatchError::new("expected no more deleted lines"));
296            }
297            Line::Delete(line)
298        } else if let Some(line) = line.strip_prefix("+") {
299            if no_newline_insert {
300                return Err(ParsePatchError::new("expected no more inserted lines"));
301            }
302            Line::Insert(line)
303        } else if line.starts_with(NO_NEWLINE_AT_EOF) {
304            let last_line = lines.pop().ok_or_else(|| {
305                ParsePatchError::new("unexpected 'No newline at end of file' line")
306            })?;
307            match last_line {
308                Line::Context(line) => {
309                    no_newline_context = true;
310                    Line::Context(strip_newline(line)?)
311                }
312                Line::Delete(line) => {
313                    no_newline_delete = true;
314                    Line::Delete(strip_newline(line)?)
315                }
316                Line::Insert(line) => {
317                    no_newline_insert = true;
318                    Line::Insert(strip_newline(line)?)
319                }
320            }
321        } else {
322            return Err(ParsePatchError::new("unexpected line in hunk body"));
323        };
324
325        lines.push(line);
326        parser.next()?;
327    }
328
329    Ok(lines)
330}
331
332fn strip_newline<T: Text + ?Sized>(s: &T) -> Result<&T> {
333    if let Some(stripped) = s.strip_suffix("\n") {
334        Ok(stripped)
335    } else {
336        Err(ParsePatchError::new("missing newline"))
337    }
338}
339
340#[cfg(test)]
341mod tests {
342    use super::{parse, parse_bytes};
343
344    #[test]
345    fn test_escaped_filenames() {
346        // No escaped characters
347        let s = "\
348--- original
349+++ modified
350@@ -1,0 +1,1 @@
351+Oathbringer
352";
353        parse(s).unwrap();
354        parse_bytes(s.as_ref()).unwrap();
355
356        // unescaped characters fail parsing
357        let s = "\
358--- ori\"ginal
359+++ modified
360@@ -1,0 +1,1 @@
361+Oathbringer
362";
363        parse(s).unwrap_err();
364        parse_bytes(s.as_ref()).unwrap_err();
365
366        // quoted with invalid escaped characters
367        let s = "\
368--- \"ori\\\"g\rinal\"
369+++ modified
370@@ -1,0 +1,1 @@
371+Oathbringer
372";
373        parse(s).unwrap_err();
374        parse_bytes(s.as_ref()).unwrap_err();
375
376        // quoted with escaped characters
377        let s = r#"\
378--- "ori\"g\tinal"
379+++ "mo\0\t\r\n\\dified"
380@@ -1,0 +1,1 @@
381+Oathbringer
382"#;
383        let p = parse(s).unwrap();
384        assert_eq!(p.original(), Some("ori\"g\tinal"));
385        assert_eq!(p.modified(), Some("mo\0\t\r\n\\dified"));
386        let b = parse_bytes(s.as_ref()).unwrap();
387        assert_eq!(b.original(), Some(&b"ori\"g\tinal"[..]));
388        assert_eq!(b.modified(), Some(&b"mo\0\t\r\n\\dified"[..]));
389    }
390
391    #[test]
392    fn test_missing_filename_header() {
393        // Missing Both '---' and '+++' lines
394        let patch = r#"
395@@ -1,11 +1,12 @@
396 diesel::table! {
397     users1 (id) {
398-        id -> Nullable<Integer>,
399+        id -> Integer,
400     }
401 }
402
403 diesel::table! {
404-    users2 (id) {
405-        id -> Nullable<Integer>,
406+    users2 (myid) {
407+        #[sql_name = "id"]
408+        myid -> Integer,
409     }
410 }
411"#;
412
413        parse(patch).unwrap();
414
415        // Missing '---'
416        let s = "\
417+++ modified
418@@ -1,0 +1,1 @@
419+Oathbringer
420";
421        parse(s).unwrap();
422
423        // Missing '+++'
424        let s = "\
425--- original
426@@ -1,0 +1,1 @@
427+Oathbringer
428";
429        parse(s).unwrap();
430
431        // Headers out of order
432        let s = "\
433+++ modified
434--- original
435@@ -1,0 +1,1 @@
436+Oathbringer
437";
438        parse(s).unwrap();
439
440        // multiple headers should fail to parse
441        let s = "\
442--- original
443--- modified
444@@ -1,0 +1,1 @@
445+Oathbringer
446";
447        parse(s).unwrap_err();
448    }
449
450    #[test]
451    fn adjacent_hunks_correctly_parse() {
452        let s = "\
453--- original
454+++ modified
455@@ -110,7 +110,7 @@
456 --
457
458 I am afraid, however, that all I have known - that my story - will be forgotten.
459 I am afraid for the world that is to come.
460-Afraid that my plans will fail. Afraid of a doom worse than the Deepness.
461+Afraid that Alendi will fail. Afraid of a doom brought by the Deepness.
462
463 Alendi was never the Hero of Ages.
464@@ -117,7 +117,7 @@
465 At best, I have amplified his virtues, creating a Hero where there was none.
466
467-At worst, I fear that all we believe may have been corrupted.
468+At worst, I fear that I have corrupted all we believe.
469
470 --
471 Alendi must not reach the Well of Ascension. He must not take the power for himself.
472
473";
474        parse(s).unwrap();
475    }
476}