use std::io::BufRead;
use std::io::Error;
use std::io::ErrorKind;
use std::io::Result as IoResult;
use std::rc::Rc;
use std::str::FromStr;
use once_cell::sync::Lazy;
use regex::Regex;
const WS_STRING: &str = r"[ \t]*";
const FILE_STRING: &str = r"([^ \t]+)";
const ADDSUB_STRING: &str = r"([+\-])";
const NUMLINE_STRING: &str = r"([0-9]+)";
static DIFF_DIFF_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^[+\-\\ ]").unwrap()
});
static DIFF_NODIFF_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[^+\- ]").unwrap());
static DIFF_SRC_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(&format!("^---{WS_STRING}{FILE_STRING}")).unwrap());
static DIFF_DST_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(&format!(r"^\+\+\+{WS_STRING}{FILE_STRING}")).unwrap());
static DIFF_HEAD_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(&format!(
"^@@ {ADDSUB_STRING}{NUMLINE_STRING}(?:,{NUMLINE_STRING})? \
{ADDSUB_STRING}{NUMLINE_STRING}(?:,{NUMLINE_STRING})? @@"
))
.unwrap()
});
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum Op {
Add,
Sub,
}
impl FromStr for Op {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"+" => Ok(Self::Add),
"-" => Ok(Self::Sub),
_ => Err(()),
}
}
}
#[derive(Debug)]
pub struct File {
pub file: Rc<String>,
pub op: Op,
pub line: usize,
pub count: usize,
}
#[derive(Clone, Debug)]
enum State {
Start,
Src { src: Rc<String> },
Dst { src: Rc<String>, dst: Rc<String> },
Hdr { src: Rc<String>, dst: Rc<String> },
}
impl State {
fn advance(&mut self, state: State) -> Option<IoResult<()>> {
*self = state;
Some(Ok(()))
}
fn parse_head(
&mut self,
diffs: &mut Vec<(File, File)>,
line: &str,
src: Rc<String>,
dst: Rc<String>,
) -> Option<IoResult<()>> {
let captures = DIFF_HEAD_REGEX.captures(line)?;
let mut parse = || -> IoResult<()> {
let add_src = captures.get(1).unwrap().as_str();
let start_src = captures.get(2).unwrap().as_str();
let count_src = captures.get(3).map(|m| m.as_str()).unwrap_or("1");
let add_dst = captures.get(4).unwrap().as_str();
let start_dst = captures.get(5).unwrap().as_str();
let count_dst = captures.get(6).map(|m| m.as_str()).unwrap_or("1");
let src_file = File {
file: src.clone(),
op: add_src.parse().unwrap(),
line: start_src.parse().map_err(|error| {
Error::new(
ErrorKind::Other,
format!(r#"failed to parse start line number in line: "{line}": {error}"#),
)
})?,
count: count_src.parse().map_err(|error| {
Error::new(
ErrorKind::Other,
format!(r#"failed to parse line count in line: "{line}": {error}"#),
)
})?,
};
let dst_file = File {
file: dst.clone(),
op: add_dst.parse().unwrap(),
line: start_dst.parse().map_err(|error| {
Error::new(
ErrorKind::Other,
format!(r#"failed to parse start line number in line: "{line}": {error}"#),
)
})?,
count: count_dst.parse().map_err(|error| {
Error::new(
ErrorKind::Other,
format!(r#"failed to parse line count in line: "{line}": {error}"#),
)
})?,
};
diffs.push((src_file, dst_file));
Ok(())
};
if let Err(error) = parse() {
return Some(Err(error))
}
self.advance(Self::Hdr { src, dst })
}
fn parse_src(&mut self, line: &str) -> Option<IoResult<()>> {
let captures = DIFF_SRC_REGEX.captures(line)?;
let src = captures.get(1).unwrap();
self.advance(Self::Src {
src: Rc::new(src.as_str().to_owned()),
})
}
fn parse_dst(&mut self, line: &str, src: Rc<String>) -> Option<IoResult<()>> {
let captures = DIFF_DST_REGEX.captures(line)?;
let dst = captures.get(1).unwrap();
self.advance(Self::Dst {
src,
dst: Rc::new(dst.as_str().to_owned()),
})
}
fn match_no_diff(&mut self, line: &str) -> Option<IoResult<()>> {
DIFF_NODIFF_REGEX.is_match(line).then(|| Ok(()))
}
fn match_diff(&mut self, line: &str) -> Option<IoResult<()>> {
DIFF_DIFF_REGEX.is_match(line).then(|| Ok(()))
}
fn restart(&mut self, line: &str) -> Option<IoResult<()>> {
DIFF_NODIFF_REGEX.is_match(line).then(|| ())?;
self.advance(Self::Start)
}
fn parse(&mut self, diffs: &mut Vec<(File, File)>, line: &str) -> IoResult<()> {
macro_rules! check {
($result:expr) => {
match $result {
None => (),
Some(result) => return result,
}
};
}
match self.clone() {
State::Start => {
check!(self.parse_src(line));
check!(self.match_no_diff(line));
},
State::Src { src } => {
check!(self.parse_dst(line, src));
},
State::Dst { src, dst } => {
check!(self.parse_head(diffs, line, src, dst));
},
State::Hdr { src, dst } => {
check!(self.match_diff(line));
check!(self.parse_head(diffs, line, src, dst));
check!(self.restart(line));
},
};
Err(Error::new(
ErrorKind::Other,
format!(r#"encountered unexpected line: "{line}" (state: {self:?})"#),
))
}
}
pub struct Parser {
state: State,
diffs: Vec<(File, File)>,
}
impl Parser {
#[inline]
pub fn new() -> Self {
Self {
state: State::Start,
diffs: Vec::new(),
}
}
pub fn parse<L>(&mut self, mut lines: L) -> IoResult<()>
where
L: BufRead,
{
let mut line = String::new();
loop {
line.clear();
let count = lines.read_line(&mut line)?;
if count == 0 {
break Ok(())
}
let line = if let Some(line) = line.strip_suffix('\n') {
line
} else {
&line
};
if !line.is_empty() {
let () = self.state.parse(&mut self.diffs, line)?;
}
}
}
pub fn diffs(&self) -> &[(File, File)] {
&self.diffs
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::ops::Deref as _;
#[test]
fn parse_simple_diff() {
let diff = r#"
--- main.c
+++ main.c
@@ -6,6 +6,6 @@ int main(int argc, char const* argv[])
fprintf(stderr, "Too many arguments.\n");
return -1;
}
- printf("Hello world!");
+ printf("Hello world!\n");
return 0;
}"#;
let mut parser = Parser::new();
let () = parser.parse(diff.as_bytes()).unwrap();
let diffs = parser.diffs();
assert_eq!(diffs.len(), 1);
let (src, dst) = &diffs[0];
assert_eq!(src.file.deref(), "main.c");
assert_eq!(src.op, Op::Sub);
assert_eq!(src.line, 6);
assert_eq!(src.count, 6);
assert_eq!(dst.file.deref(), "main.c");
assert_eq!(dst.op, Op::Add);
assert_eq!(dst.line, 6);
assert_eq!(dst.count, 6);
}
#[test]
fn parse_diff_adding_newline_at_end_of_file() {
let diff = r#"
--- main.c
+++ main.c
@@ -8,4 +8,4 @@ int main(int argc, char const* argv[])
}
printf("Hello world!");
return 0;
-}
\\ No newline at end of file
+}"#;
let mut parser = Parser::new();
let () = parser.parse(diff.as_bytes()).unwrap();
let diffs = parser.diffs();
assert_eq!(diffs.len(), 1);
let (src, dst) = &diffs[0];
assert_eq!(src.file.deref(), "main.c");
assert_eq!(src.op, Op::Sub);
assert_eq!(src.line, 8);
assert_eq!(src.count, 4);
assert_eq!(dst.file.deref(), "main.c");
assert_eq!(dst.op, Op::Add);
assert_eq!(dst.line, 8);
assert_eq!(dst.count, 4);
}
#[test]
fn parse_diff_removing_newline_at_end_of_file() {
let diff = r#"
--- main.c
+++ main.c
@@ -8,4 +8,4 @@ int main(int argc, char const* argv[])
}
printf("Hello world!");
return 0;
-}
+}
\\ No newline at end of file"#;
let mut parser = Parser::new();
let () = parser.parse(diff.as_bytes()).unwrap();
let diffs = parser.diffs();
assert_eq!(diffs.len(), 1);
let (src, dst) = &diffs[0];
assert_eq!(src.file.deref(), "main.c");
assert_eq!(src.op, Op::Sub);
assert_eq!(src.line, 8);
assert_eq!(src.count, 4);
assert_eq!(dst.file.deref(), "main.c");
assert_eq!(dst.op, Op::Add);
assert_eq!(dst.line, 8);
assert_eq!(dst.count, 4);
}
#[test]
fn parse_diff_with_added_file_with_single_line() {
let diff = r#"
--- /dev/null
+++ main.c
@@ -0,0 +1 @@
+main.c"#;
let mut parser = Parser::new();
let () = parser.parse(diff.as_bytes()).unwrap();
let diffs = parser.diffs();
assert_eq!(diffs.len(), 1);
let (src, dst) = &diffs[0];
assert_eq!(src.file.deref(), "/dev/null");
assert_eq!(src.op, Op::Sub);
assert_eq!(src.line, 0);
assert_eq!(src.count, 0);
assert_eq!(dst.file.deref(), "main.c");
assert_eq!(dst.op, Op::Add);
assert_eq!(dst.line, 1);
assert_eq!(dst.count, 1);
}
#[test]
fn parse_diff_with_removed_file_with_single_line() {
let diff = r#"
--- main.c
+++ /dev/null
@@ -1 +0,0 @@
-main.c"#;
let mut parser = Parser::new();
let () = parser.parse(diff.as_bytes()).unwrap();
let diffs = parser.diffs();
assert_eq!(diffs.len(), 1);
let (src, dst) = &diffs[0];
assert_eq!(src.file.deref(), "main.c");
assert_eq!(src.op, Op::Sub);
assert_eq!(src.line, 1);
assert_eq!(src.count, 1);
assert_eq!(dst.file.deref(), "/dev/null");
assert_eq!(dst.op, Op::Add);
assert_eq!(dst.line, 0);
assert_eq!(dst.count, 0);
}
#[test]
fn parse_diff_with_empty_line() {
let diff = r#"
--- main.c
+++ main.c
@@ -1,6 +1,6 @@
#include <stdio.h>
-int main(int argc, char const* argv[])
+int main(int argc, char* argv[])
{
if (argc > 1) {
fprintf(stderr, "Too many arguments.\n");"#;
let mut parser = Parser::new();
let () = parser.parse(diff.as_bytes()).unwrap();
let diffs = parser.diffs();
assert_eq!(diffs.len(), 1);
let (src, dst) = &diffs[0];
assert_eq!(src.file.deref(), "main.c");
assert_eq!(src.op, Op::Sub);
assert_eq!(src.line, 1);
assert_eq!(src.count, 6);
assert_eq!(dst.file.deref(), "main.c");
assert_eq!(dst.op, Op::Add);
assert_eq!(dst.line, 1);
assert_eq!(dst.count, 6);
}
}