1use std::io::BufRead;
7use std::io::Error;
8use std::io::ErrorKind;
9use std::io::Result as IoResult;
10use std::rc::Rc;
11use std::str::FromStr;
12
13use once_cell::sync::Lazy;
14
15use regex::Regex;
16
17const WS_STRING: &str = r"[ \t]*";
18const FILE_STRING: &str = r"([^ \t]+)";
19const ADDSUB_STRING: &str = r"([+\-])";
20const NUMLINE_STRING: &str = r"([0-9]+)";
21
22static DIFF_DIFF_REGEX: Lazy<Regex> = Lazy::new(|| {
23 Regex::new(r"^[+\-\\ ]").unwrap()
29});
30static DIFF_NODIFF_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[^+\- ]").unwrap());
31static DIFF_SRC_REGEX: Lazy<Regex> =
32 Lazy::new(|| Regex::new(&format!("^---{WS_STRING}{FILE_STRING}")).unwrap());
33static DIFF_DST_REGEX: Lazy<Regex> =
34 Lazy::new(|| Regex::new(&format!(r"^\+\+\+{WS_STRING}{FILE_STRING}")).unwrap());
35static DIFF_HEAD_REGEX: Lazy<Regex> = Lazy::new(|| {
36 Regex::new(&format!(
39 "^@@ {ADDSUB_STRING}{NUMLINE_STRING}(?:,{NUMLINE_STRING})? \
40 {ADDSUB_STRING}{NUMLINE_STRING}(?:,{NUMLINE_STRING})? @@"
41 ))
42 .unwrap()
43});
44
45
46#[derive(Clone, Copy, Debug, PartialEq)]
48pub enum Op {
49 Add,
51 Sub,
53}
54
55impl FromStr for Op {
56 type Err = ();
57
58 fn from_str(s: &str) -> Result<Self, Self::Err> {
59 match s {
60 "+" => Ok(Self::Add),
61 "-" => Ok(Self::Sub),
62 _ => Err(()),
63 }
64 }
65}
66
67
68#[derive(Debug)]
70pub struct File {
71 pub file: Rc<String>,
73 pub op: Op,
75 pub line: usize,
77 pub count: usize,
79}
80
81
82#[derive(Clone, Debug)]
84enum State {
85 Start,
87 Src { src: Rc<String> },
89 Dst { src: Rc<String>, dst: Rc<String> },
91 Hdr { src: Rc<String>, dst: Rc<String> },
93}
94
95impl State {
96 fn advance(&mut self, state: State) -> Option<IoResult<()>> {
98 *self = state;
99 Some(Ok(()))
100 }
101
102 fn parse_head(
104 &mut self,
105 diffs: &mut Vec<(File, File)>,
106 line: &str,
107 src: Rc<String>,
108 dst: Rc<String>,
109 ) -> Option<IoResult<()>> {
110 let captures = DIFF_HEAD_REGEX.captures(line)?;
111
112 let mut parse = || -> IoResult<()> {
113 let add_src = captures.get(1).unwrap().as_str();
116 let start_src = captures.get(2).unwrap().as_str();
117 let count_src = captures.get(3).map(|m| m.as_str()).unwrap_or("1");
120 let add_dst = captures.get(4).unwrap().as_str();
121 let start_dst = captures.get(5).unwrap().as_str();
122 let count_dst = captures.get(6).map(|m| m.as_str()).unwrap_or("1");
123
124 let src_file = File {
125 file: src.clone(),
126 op: add_src.parse().unwrap(),
129 line: start_src.parse().map_err(|error| {
130 Error::new(
131 ErrorKind::Other,
132 format!(r#"failed to parse start line number in line: "{line}": {error}"#),
133 )
134 })?,
135 count: count_src.parse().map_err(|error| {
136 Error::new(
137 ErrorKind::Other,
138 format!(r#"failed to parse line count in line: "{line}": {error}"#),
139 )
140 })?,
141 };
142 let dst_file = File {
143 file: dst.clone(),
144 op: add_dst.parse().unwrap(),
147 line: start_dst.parse().map_err(|error| {
148 Error::new(
149 ErrorKind::Other,
150 format!(r#"failed to parse start line number in line: "{line}": {error}"#),
151 )
152 })?,
153 count: count_dst.parse().map_err(|error| {
154 Error::new(
155 ErrorKind::Other,
156 format!(r#"failed to parse line count in line: "{line}": {error}"#),
157 )
158 })?,
159 };
160 diffs.push((src_file, dst_file));
161 Ok(())
162 };
163
164
165 if let Err(error) = parse() {
166 return Some(Err(error))
167 }
168 self.advance(Self::Hdr { src, dst })
169 }
170
171 fn parse_src(&mut self, line: &str) -> Option<IoResult<()>> {
173 let captures = DIFF_SRC_REGEX.captures(line)?;
174 let src = captures.get(1).unwrap();
177
178 self.advance(Self::Src {
179 src: Rc::new(src.as_str().to_owned()),
180 })
181 }
182
183 fn parse_dst(&mut self, line: &str, src: Rc<String>) -> Option<IoResult<()>> {
185 let captures = DIFF_DST_REGEX.captures(line)?;
186 let dst = captures.get(1).unwrap();
189
190 self.advance(Self::Dst {
191 src,
192 dst: Rc::new(dst.as_str().to_owned()),
193 })
194 }
195
196 fn match_no_diff(&mut self, line: &str) -> Option<IoResult<()>> {
198 DIFF_NODIFF_REGEX.is_match(line).then(|| Ok(()))
199 }
200
201 fn match_diff(&mut self, line: &str) -> Option<IoResult<()>> {
203 DIFF_DIFF_REGEX.is_match(line).then(|| Ok(()))
204 }
205
206 fn restart(&mut self, line: &str) -> Option<IoResult<()>> {
209 DIFF_NODIFF_REGEX.is_match(line).then(|| ())?;
210 self.advance(Self::Start)
211 }
212
213 fn parse(&mut self, diffs: &mut Vec<(File, File)>, line: &str) -> IoResult<()> {
214 macro_rules! check {
216 ($result:expr) => {
217 match $result {
218 None => (),
220 Some(result) => return result,
224 }
225 };
226 }
227
228 match self.clone() {
230 State::Start => {
231 check!(self.parse_src(line));
232 check!(self.match_no_diff(line));
233 },
234 State::Src { src } => {
235 check!(self.parse_dst(line, src));
236 },
237 State::Dst { src, dst } => {
238 check!(self.parse_head(diffs, line, src, dst));
239 },
240 State::Hdr { src, dst } => {
241 check!(self.match_diff(line));
242 check!(self.parse_head(diffs, line, src, dst));
243 check!(self.restart(line));
244 },
245 };
246
247 Err(Error::new(
248 ErrorKind::Other,
249 format!(r#"encountered unexpected line: "{line}" (state: {self:?})"#),
250 ))
251 }
252}
253
254
255pub struct Parser {
257 state: State,
258 diffs: Vec<(File, File)>,
259}
260
261impl Parser {
262 #[inline]
264 pub fn new() -> Self {
265 Self {
266 state: State::Start,
267 diffs: Vec::new(),
268 }
269 }
270
271 pub fn parse<L>(&mut self, mut lines: L) -> IoResult<()>
273 where
274 L: BufRead,
275 {
276 let mut line = String::new();
277
278 loop {
279 line.clear();
280
281 let count = lines.read_line(&mut line)?;
282 if count == 0 {
283 break Ok(())
285 }
286
287 let line = if let Some(line) = line.strip_suffix('\n') {
289 line
290 } else {
291 &line
292 };
293 if !line.is_empty() {
297 let () = self.state.parse(&mut self.diffs, line)?;
298 }
299 }
300 }
301
302 pub fn diffs(&self) -> &[(File, File)] {
304 &self.diffs
305 }
306}
307
308
309#[cfg(test)]
310mod tests {
311 use super::*;
312
313 use std::ops::Deref as _;
314
315
316 #[test]
318 fn parse_simple_diff() {
319 let diff = r#"
320--- main.c
321+++ main.c
322@@ -6,6 +6,6 @@ int main(int argc, char const* argv[])
323 fprintf(stderr, "Too many arguments.\n");
324 return -1;
325 }
326- printf("Hello world!");
327+ printf("Hello world!\n");
328 return 0;
329 }"#;
330
331 let mut parser = Parser::new();
332 let () = parser.parse(diff.as_bytes()).unwrap();
333
334 let diffs = parser.diffs();
335 assert_eq!(diffs.len(), 1);
336
337 let (src, dst) = &diffs[0];
338 assert_eq!(src.file.deref(), "main.c");
339 assert_eq!(src.op, Op::Sub);
340 assert_eq!(src.line, 6);
341 assert_eq!(src.count, 6);
342
343 assert_eq!(dst.file.deref(), "main.c");
344 assert_eq!(dst.op, Op::Add);
345 assert_eq!(dst.line, 6);
346 assert_eq!(dst.count, 6);
347 }
348
349 #[test]
352 fn parse_diff_adding_newline_at_end_of_file() {
353 let diff = r#"
354--- main.c
355+++ main.c
356@@ -8,4 +8,4 @@ int main(int argc, char const* argv[])
357 }
358 printf("Hello world!");
359 return 0;
360-}
361\\ No newline at end of file
362+}"#;
363
364 let mut parser = Parser::new();
365 let () = parser.parse(diff.as_bytes()).unwrap();
366
367 let diffs = parser.diffs();
368 assert_eq!(diffs.len(), 1);
369
370 let (src, dst) = &diffs[0];
371 assert_eq!(src.file.deref(), "main.c");
372 assert_eq!(src.op, Op::Sub);
373 assert_eq!(src.line, 8);
374 assert_eq!(src.count, 4);
375
376 assert_eq!(dst.file.deref(), "main.c");
377 assert_eq!(dst.op, Op::Add);
378 assert_eq!(dst.line, 8);
379 assert_eq!(dst.count, 4);
380 }
381
382 #[test]
385 fn parse_diff_removing_newline_at_end_of_file() {
386 let diff = r#"
387--- main.c
388+++ main.c
389@@ -8,4 +8,4 @@ int main(int argc, char const* argv[])
390 }
391 printf("Hello world!");
392 return 0;
393-}
394+}
395\\ No newline at end of file"#;
396
397 let mut parser = Parser::new();
398 let () = parser.parse(diff.as_bytes()).unwrap();
399
400 let diffs = parser.diffs();
401 assert_eq!(diffs.len(), 1);
402
403 let (src, dst) = &diffs[0];
404 assert_eq!(src.file.deref(), "main.c");
405 assert_eq!(src.op, Op::Sub);
406 assert_eq!(src.line, 8);
407 assert_eq!(src.count, 4);
408
409 assert_eq!(dst.file.deref(), "main.c");
410 assert_eq!(dst.op, Op::Add);
411 assert_eq!(dst.line, 8);
412 assert_eq!(dst.count, 4);
413 }
414
415 #[test]
417 fn parse_diff_with_added_file_with_single_line() {
418 let diff = r#"
419--- /dev/null
420+++ main.c
421@@ -0,0 +1 @@
422+main.c"#;
423
424 let mut parser = Parser::new();
425 let () = parser.parse(diff.as_bytes()).unwrap();
426
427 let diffs = parser.diffs();
428 assert_eq!(diffs.len(), 1);
429
430 let (src, dst) = &diffs[0];
431 assert_eq!(src.file.deref(), "/dev/null");
432 assert_eq!(src.op, Op::Sub);
433 assert_eq!(src.line, 0);
434 assert_eq!(src.count, 0);
435
436 assert_eq!(dst.file.deref(), "main.c");
437 assert_eq!(dst.op, Op::Add);
438 assert_eq!(dst.line, 1);
439 assert_eq!(dst.count, 1);
440 }
441
442 #[test]
444 fn parse_diff_with_removed_file_with_single_line() {
445 let diff = r#"
446--- main.c
447+++ /dev/null
448@@ -1 +0,0 @@
449-main.c"#;
450
451 let mut parser = Parser::new();
452 let () = parser.parse(diff.as_bytes()).unwrap();
453
454 let diffs = parser.diffs();
455 assert_eq!(diffs.len(), 1);
456
457 let (src, dst) = &diffs[0];
458 assert_eq!(src.file.deref(), "main.c");
459 assert_eq!(src.op, Op::Sub);
460 assert_eq!(src.line, 1);
461 assert_eq!(src.count, 1);
462
463 assert_eq!(dst.file.deref(), "/dev/null");
464 assert_eq!(dst.op, Op::Add);
465 assert_eq!(dst.line, 0);
466 assert_eq!(dst.count, 0);
467 }
468
469 #[test]
471 fn parse_diff_with_empty_line() {
472 let diff = r#"
473--- main.c
474+++ main.c
475@@ -1,6 +1,6 @@
476 #include <stdio.h>
477
478-int main(int argc, char const* argv[])
479+int main(int argc, char* argv[])
480 {
481 if (argc > 1) {
482 fprintf(stderr, "Too many arguments.\n");"#;
483
484 let mut parser = Parser::new();
485 let () = parser.parse(diff.as_bytes()).unwrap();
486
487 let diffs = parser.diffs();
488 assert_eq!(diffs.len(), 1);
489
490 let (src, dst) = &diffs[0];
491 assert_eq!(src.file.deref(), "main.c");
492 assert_eq!(src.op, Op::Sub);
493 assert_eq!(src.line, 1);
494 assert_eq!(src.count, 6);
495
496 assert_eq!(dst.file.deref(), "main.c");
497 assert_eq!(dst.op, Op::Add);
498 assert_eq!(dst.line, 1);
499 assert_eq!(dst.count, 6);
500 }
501}