1use super::{Hunk, HunkRange, Line, ESCAPED_CHARS_BYTES, NO_NEWLINE_AT_EOF};
4use crate::{
5 patch::Patch,
6 utils::{LineIter, Text},
7};
8use std::{borrow::Cow, fmt};
9
10type Result<T, E = ParsePatchError> = std::result::Result<T, E>;
11
12#[derive(Debug)]
17pub struct ParsePatchError(Cow<'static, str>);
18
19impl ParsePatchError {
20 fn new<E: Into<Cow<'static, str>>>(e: E) -> Self {
21 Self(e.into())
22 }
23}
24
25impl fmt::Display for ParsePatchError {
26 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
27 write!(f, "error parsing patch: {}", self.0)
28 }
29}
30
31impl std::error::Error for ParsePatchError {}
32
33struct Parser<'a, T: Text + ?Sized> {
34 lines: std::iter::Peekable<LineIter<'a, T>>,
35}
36
37impl<'a, T: Text + ?Sized> Parser<'a, T> {
38 fn new(input: &'a T) -> Self {
39 Self {
40 lines: LineIter::new(input).peekable(),
41 }
42 }
43
44 fn peek(&mut self) -> Option<&&'a T> {
45 self.lines.peek()
46 }
47
48 fn next(&mut self) -> Result<&'a T> {
49 let line = self
50 .lines
51 .next()
52 .ok_or_else(|| ParsePatchError::new("unexpected EOF"))?;
53 Ok(line)
54 }
55}
56
57pub fn parse(input: &str) -> Result<Patch<'_, str>> {
58 let mut parser = Parser::new(input);
59 let header = patch_header(&mut parser)?;
60 let hunks = hunks(&mut parser)?;
61
62 Ok(Patch::new(
63 header.0.map(convert_cow_to_str),
64 header.1.map(convert_cow_to_str),
65 hunks,
66 ))
67}
68
69pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> {
70 let mut parser = Parser::new(input);
71 let header = patch_header(&mut parser)?;
72 let hunks = hunks(&mut parser)?;
73
74 Ok(Patch::new(header.0, header.1, hunks))
75}
76
77fn convert_cow_to_str(cow: Cow<'_, [u8]>) -> Cow<'_, str> {
79 match cow {
80 Cow::Borrowed(b) => std::str::from_utf8(b).unwrap().into(),
81 Cow::Owned(o) => String::from_utf8(o).unwrap().into(),
82 }
83}
84
85#[allow(clippy::type_complexity)]
86fn patch_header<'a, T: Text + ToOwned + ?Sized>(
87 parser: &mut Parser<'a, T>,
88) -> Result<(Option<Cow<'a, [u8]>>, Option<Cow<'a, [u8]>>)> {
89 skip_header_preamble(parser)?;
90
91 let mut filename1 = None;
92 let mut filename2 = None;
93
94 while let Some(line) = parser.peek() {
95 if line.starts_with("--- ") {
96 if filename1.is_some() {
97 return Err(ParsePatchError::new("multiple '---' lines"));
98 }
99 filename1 = Some(parse_filename("--- ", parser.next()?)?);
100 } else if line.starts_with("+++ ") {
101 if filename2.is_some() {
102 return Err(ParsePatchError::new("multiple '+++' lines"));
103 }
104 filename2 = Some(parse_filename("+++ ", parser.next()?)?);
105 } else {
106 break;
107 }
108 }
109
110 Ok((filename1, filename2))
111}
112
113fn skip_header_preamble<T: Text + ?Sized>(parser: &mut Parser<'_, T>) -> Result<()> {
116 while let Some(line) = parser.peek() {
117 if line.starts_with("--- ") | line.starts_with("+++ ") | line.starts_with("@@ ") {
118 break;
119 }
120 parser.next()?;
121 }
122
123 Ok(())
124}
125
126fn parse_filename<'a, T: Text + ToOwned + ?Sized>(
127 prefix: &str,
128 line: &'a T,
129) -> Result<Cow<'a, [u8]>> {
130 let line = line
131 .strip_prefix(prefix)
132 .ok_or_else(|| ParsePatchError::new("unable to parse filename"))?;
133
134 let filename = if let Some((filename, _)) = line.split_at_exclusive("\t") {
135 filename
136 } else if let Some((filename, _)) = line.split_at_exclusive("\n") {
137 filename
138 } else {
139 return Err(ParsePatchError::new("filename unterminated"));
140 };
141
142 let filename = if let Some(quoted) = is_quoted(filename) {
143 escaped_filename(quoted)?
144 } else {
145 unescaped_filename(filename)?
146 };
147
148 Ok(filename)
149}
150
151fn is_quoted<T: Text + ?Sized>(s: &T) -> Option<&T> {
152 s.strip_prefix("\"").and_then(|s| s.strip_suffix("\""))
153}
154
155fn unescaped_filename<T: Text + ToOwned + ?Sized>(filename: &T) -> Result<Cow<'_, [u8]>> {
156 let bytes = filename.as_bytes();
157
158 if bytes.iter().any(|b| ESCAPED_CHARS_BYTES.contains(b)) {
159 return Err(ParsePatchError::new("invalid char in unquoted filename"));
160 }
161
162 Ok(bytes.into())
163}
164
165fn escaped_filename<T: Text + ToOwned + ?Sized>(escaped: &T) -> Result<Cow<'_, [u8]>> {
166 let mut filename = Vec::new();
167
168 let mut chars = escaped.as_bytes().iter().copied();
169 while let Some(c) = chars.next() {
170 if c == b'\\' {
171 let ch = match chars
172 .next()
173 .ok_or_else(|| ParsePatchError::new("expected escaped character"))?
174 {
175 b'n' => b'\n',
176 b't' => b'\t',
177 b'0' => b'\0',
178 b'r' => b'\r',
179 b'\"' => b'\"',
180 b'\\' => b'\\',
181 _ => return Err(ParsePatchError::new("invalid escaped character")),
182 };
183 filename.push(ch);
184 } else if ESCAPED_CHARS_BYTES.contains(&c) {
185 return Err(ParsePatchError::new("invalid unescaped character"));
186 } else {
187 filename.push(c);
188 }
189 }
190
191 Ok(filename.into())
192}
193
194fn verify_hunks_in_order<T: ?Sized>(hunks: &[Hunk<'_, T>]) -> bool {
195 for hunk in hunks.windows(2) {
196 if hunk[0].old_range.end() > hunk[1].old_range.start()
197 || hunk[0].new_range.end() > hunk[1].new_range.start()
198 {
199 return false;
200 }
201 }
202 true
203}
204
205fn hunks<'a, T: Text + ?Sized>(parser: &mut Parser<'a, T>) -> Result<Vec<Hunk<'a, T>>> {
206 let mut hunks = Vec::new();
207 while parser.peek().is_some() {
208 hunks.push(hunk(parser)?);
209 }
210
211 if !verify_hunks_in_order(&hunks) {
213 return Err(ParsePatchError::new("Hunks not in order or overlap"));
214 }
215
216 Ok(hunks)
217}
218
219fn hunk<'a, T: Text + ?Sized>(parser: &mut Parser<'a, T>) -> Result<Hunk<'a, T>> {
220 let (range1, range2, function_context) = hunk_header(parser.next()?)?;
221 let lines = hunk_lines(parser)?;
222
223 let (len1, len2) = super::hunk_lines_count(&lines);
225 if len1 != range1.len || len2 != range2.len {
226 return Err(ParsePatchError::new("Hunk header does not match hunk"));
227 }
228
229 Ok(Hunk::new(range1, range2, function_context, lines))
230}
231
232fn hunk_header<T: Text + ?Sized>(input: &T) -> Result<(HunkRange, HunkRange, Option<&T>)> {
233 let input = input
234 .strip_prefix("@@ ")
235 .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?;
236
237 let (ranges, function_context) = input
238 .split_at_exclusive(" @@")
239 .ok_or_else(|| ParsePatchError::new("hunk header unterminated"))?;
240 let function_context = function_context.strip_prefix(" ");
241
242 let (range1, range2) = ranges
243 .split_at_exclusive(" ")
244 .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?;
245 let range1 = range(
246 range1
247 .strip_prefix("-")
248 .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?,
249 )?;
250 let range2 = range(
251 range2
252 .strip_prefix("+")
253 .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?,
254 )?;
255 Ok((range1, range2, function_context))
256}
257
258fn range<T: Text + ?Sized>(s: &T) -> Result<HunkRange> {
259 let (start, len) = if let Some((start, len)) = s.split_at_exclusive(",") {
260 (
261 start
262 .parse()
263 .ok_or_else(|| ParsePatchError::new("can't parse range"))?,
264 len.parse()
265 .ok_or_else(|| ParsePatchError::new("can't parse range"))?,
266 )
267 } else {
268 (
269 s.parse()
270 .ok_or_else(|| ParsePatchError::new("can't parse range"))?,
271 1,
272 )
273 };
274
275 Ok(HunkRange::new(start, len))
276}
277
278fn hunk_lines<'a, T: Text + ?Sized>(parser: &mut Parser<'a, T>) -> Result<Vec<Line<'a, T>>> {
279 let mut lines: Vec<Line<'a, T>> = Vec::new();
280 let mut no_newline_context = false;
281 let mut no_newline_delete = false;
282 let mut no_newline_insert = false;
283
284 while let Some(line) = parser.peek() {
285 let line = if line.starts_with("@") {
286 break;
287 } else if no_newline_context {
288 return Err(ParsePatchError::new("expected end of hunk"));
289 } else if let Some(line) = line.strip_prefix(" ") {
290 Line::Context(line)
291 } else if line.starts_with("\n") {
292 Line::Context(*line)
293 } else if let Some(line) = line.strip_prefix("-") {
294 if no_newline_delete {
295 return Err(ParsePatchError::new("expected no more deleted lines"));
296 }
297 Line::Delete(line)
298 } else if let Some(line) = line.strip_prefix("+") {
299 if no_newline_insert {
300 return Err(ParsePatchError::new("expected no more inserted lines"));
301 }
302 Line::Insert(line)
303 } else if line.starts_with(NO_NEWLINE_AT_EOF) {
304 let last_line = lines.pop().ok_or_else(|| {
305 ParsePatchError::new("unexpected 'No newline at end of file' line")
306 })?;
307 match last_line {
308 Line::Context(line) => {
309 no_newline_context = true;
310 Line::Context(strip_newline(line)?)
311 }
312 Line::Delete(line) => {
313 no_newline_delete = true;
314 Line::Delete(strip_newline(line)?)
315 }
316 Line::Insert(line) => {
317 no_newline_insert = true;
318 Line::Insert(strip_newline(line)?)
319 }
320 }
321 } else {
322 return Err(ParsePatchError::new("unexpected line in hunk body"));
323 };
324
325 lines.push(line);
326 parser.next()?;
327 }
328
329 Ok(lines)
330}
331
332fn strip_newline<T: Text + ?Sized>(s: &T) -> Result<&T> {
333 if let Some(stripped) = s.strip_suffix("\n") {
334 Ok(stripped)
335 } else {
336 Err(ParsePatchError::new("missing newline"))
337 }
338}
339
340#[cfg(test)]
341mod tests {
342 use super::{parse, parse_bytes};
343
344 #[test]
345 fn test_escaped_filenames() {
346 let s = "\
348--- original
349+++ modified
350@@ -1,0 +1,1 @@
351+Oathbringer
352";
353 parse(s).unwrap();
354 parse_bytes(s.as_ref()).unwrap();
355
356 let s = "\
358--- ori\"ginal
359+++ modified
360@@ -1,0 +1,1 @@
361+Oathbringer
362";
363 parse(s).unwrap_err();
364 parse_bytes(s.as_ref()).unwrap_err();
365
366 let s = "\
368--- \"ori\\\"g\rinal\"
369+++ modified
370@@ -1,0 +1,1 @@
371+Oathbringer
372";
373 parse(s).unwrap_err();
374 parse_bytes(s.as_ref()).unwrap_err();
375
376 let s = r#"\
378--- "ori\"g\tinal"
379+++ "mo\0\t\r\n\\dified"
380@@ -1,0 +1,1 @@
381+Oathbringer
382"#;
383 let p = parse(s).unwrap();
384 assert_eq!(p.original(), Some("ori\"g\tinal"));
385 assert_eq!(p.modified(), Some("mo\0\t\r\n\\dified"));
386 let b = parse_bytes(s.as_ref()).unwrap();
387 assert_eq!(b.original(), Some(&b"ori\"g\tinal"[..]));
388 assert_eq!(b.modified(), Some(&b"mo\0\t\r\n\\dified"[..]));
389 }
390
391 #[test]
392 fn test_missing_filename_header() {
393 let patch = r#"
395@@ -1,11 +1,12 @@
396 diesel::table! {
397 users1 (id) {
398- id -> Nullable<Integer>,
399+ id -> Integer,
400 }
401 }
402
403 diesel::table! {
404- users2 (id) {
405- id -> Nullable<Integer>,
406+ users2 (myid) {
407+ #[sql_name = "id"]
408+ myid -> Integer,
409 }
410 }
411"#;
412
413 parse(patch).unwrap();
414
415 let s = "\
417+++ modified
418@@ -1,0 +1,1 @@
419+Oathbringer
420";
421 parse(s).unwrap();
422
423 let s = "\
425--- original
426@@ -1,0 +1,1 @@
427+Oathbringer
428";
429 parse(s).unwrap();
430
431 let s = "\
433+++ modified
434--- original
435@@ -1,0 +1,1 @@
436+Oathbringer
437";
438 parse(s).unwrap();
439
440 let s = "\
442--- original
443--- modified
444@@ -1,0 +1,1 @@
445+Oathbringer
446";
447 parse(s).unwrap_err();
448 }
449
450 #[test]
451 fn adjacent_hunks_correctly_parse() {
452 let s = "\
453--- original
454+++ modified
455@@ -110,7 +110,7 @@
456 --
457
458 I am afraid, however, that all I have known - that my story - will be forgotten.
459 I am afraid for the world that is to come.
460-Afraid that my plans will fail. Afraid of a doom worse than the Deepness.
461+Afraid that Alendi will fail. Afraid of a doom brought by the Deepness.
462
463 Alendi was never the Hero of Ages.
464@@ -117,7 +117,7 @@
465 At best, I have amplified his virtues, creating a Hero where there was none.
466
467-At worst, I fear that all we believe may have been corrupted.
468+At worst, I fear that I have corrupted all we believe.
469
470 --
471 Alendi must not reach the Well of Ascension. He must not take the power for himself.
472
473";
474 parse(s).unwrap();
475 }
476}