use core::str::Split;
use std::borrow::Cow;
use regex::Regex;
static OLD_FILE_NAME_HEADER: &str = "--- ";
static NEW_FILE_NAME_HEADER: &str = "+++ ";
static HUNK_HEADER_PREFIX: &str = "@@";
pub fn parse_diff(diff: &str) -> Vec<File> {
let mut state = ParseState::new();
let diff: Cow<'_, str> = Cow::Borrowed(diff);
lazy_static! {
static ref NO_NEWLINE: Regex = Regex::new(r#"\\ No newline at end of file"#).unwrap();
static ref REPLACE_LINE: Regex = Regex::new(r#"\r\n?"#).unwrap();
static ref OLD_MODE: Regex = Regex::new(r#"old mode (\d{6})"#).unwrap();
static ref NEW_MODE: Regex = Regex::new(r#"new mode (\d{6})"#).unwrap();
static ref DELETED_FILE_MODE: Regex = Regex::new(r#"deleted file mode (\d{6})"#).unwrap();
static ref NEW_FILE_MODE: Regex = Regex::new(r#"new file mode (\d{6})"#).unwrap();
static ref COPY_FROM: Regex = Regex::new(r#"copy from "?(.+)"?"#).unwrap();
static ref COPY_TO: Regex = Regex::new(r#"copy to "?(.+)"?"#).unwrap();
static ref RENAME_FROM: Regex = Regex::new(r#"rename from "?(.+)"?"#).unwrap();
static ref RENAME_TO: Regex = Regex::new(r#"rename to "?(.+)"?"#).unwrap();
static ref SIMILARITY_INDEX: Regex = Regex::new(r#"similarity index (\d+)%"#).unwrap();
static ref DISSIMILARITY_INDEX: Regex = Regex::new(r#"dissimilarity index (\d+)%"#).unwrap();
static ref INDEX: Regex = Regex::new(r#"index ([0-9a-z]+)\.\.([0-9a-z]+)\s*(\d{6})?"#).unwrap();
static ref BINARY_FILES: Regex = Regex::new(r#"Binary files (.*) and (.*) differ"#).unwrap();
static ref BINARY_DIFF: Regex = Regex::new(r#"GIT binary patch"#).unwrap();
static ref COMBINED_INDEX: Regex = Regex::new(r#"index ([0-9a-z]+),([0-9a-z]+)\.\.([0-9a-z]+)"#).unwrap();
static ref COMBINED_MODE: Regex = Regex::new(r#"mode (\d{6}),(\d{6})\.\.(\d{6})"#).unwrap();
static ref COMBINED_NEW_FILE: Regex = Regex::new(r#"new file mode (\d{6})"#).unwrap();
static ref COMBINED_DELETED_FILE: Regex = Regex::new(r#"deleted file mode (\d{6}),(\d{6})"#).unwrap();
static ref GIT_DIFF_START: Regex = Regex::new(r#"^diff --git "?(.+)"? "?(.+)"?"#).unwrap();
}
let diff = NO_NEWLINE.replace_all(&diff, "");
let diff = REPLACE_LINE.replace_all(&diff, "\n");
let diff_lines: Split<&str> = diff.split("\n");
let diff_lines: Vec<&str> = diff_lines.collect();
for (i, line) in diff_lines.iter().enumerate() {
if *line == "" || line.starts_with("*") {
continue;
}
let prev_line = if i == 0 { None } else { diff_lines.get(i - 1) };
let next_line = diff_lines.get(i + 1);
let after_next_line = diff_lines.get(i + 2);
if line.starts_with("diff") {
state.start_file();
let captures = GIT_DIFF_START.captures(line);
if let Some(captures) = captures {
state.possible_old_name =
get_filename(None, captures.get(1).unwrap().as_str(), None);
state.possible_new_name =
get_filename(None, captures.get(2).unwrap().as_str(), None);
}
state.current_file.as_mut().unwrap().is_git_diff = true;
continue;
}
if
state.current_file.is_none()
|| (
!state.current_file.as_ref().unwrap().is_git_diff
&& (
line.starts_with(OLD_FILE_NAME_HEADER)
&& next_line.unwrap().starts_with(NEW_FILE_NAME_HEADER)
&& after_next_line.unwrap().starts_with(HUNK_HEADER_PREFIX)
)
)
{
state.start_file();
}
if (next_line.is_some()
&& line.starts_with(OLD_FILE_NAME_HEADER)
&& next_line.unwrap().starts_with(NEW_FILE_NAME_HEADER))
|| (prev_line.is_some()
&& line.starts_with(NEW_FILE_NAME_HEADER)
&& prev_line.unwrap().starts_with(OLD_FILE_NAME_HEADER))
{
if state.current_file.is_some()
&& state.current_file.as_ref().unwrap().old_name.is_none()
&& line.starts_with("--- ")
{
state.current_file.as_mut().map(|file| {
file.old_name = get_src_filename(line, None);
file.language = get_extension(
file.old_name.as_ref().unwrap(),
file.language.as_ref().map(|v| v.as_str()),
);
file
});
continue;
}
if state.current_file.is_some()
&& state.current_file.as_ref().unwrap().new_name.is_none()
&& line.starts_with("+++ ")
{
state.current_file.as_mut().map(|file| {
file.new_name = get_dst_filename(line, None);
file.language = get_extension(
file.new_name.as_ref().unwrap(),
file.language.as_ref().map(|v| v.as_str()),
);
file
});
continue;
}
}
if (state.current_file.is_some() && line.starts_with(HUNK_HEADER_PREFIX))
|| (state
.current_file
.as_ref()
.map(|file| file.is_git_diff && file.old_name.is_some() && file.new_name.is_some())
== Some(true)
&& state.current_block.is_none())
{
state.start_block(line);
continue;
}
if state.current_block.is_some()
&& (line.starts_with("+") || line.starts_with("-") || line.starts_with(" "))
{
state.create_line(line);
continue;
}
let does_not_exist_hunk_header = exist_hunk_header(line, &diff_lines, i);
if let Some(captures) = OLD_MODE.captures(line) {
state.current_file.as_mut().map(|file| {
file.old_mode = Some(vec![captures.get(1).unwrap().as_str().to_owned()]);
});
} else if let Some(captures) = NEW_MODE.captures(line) {
state.current_file.as_mut().map(|file| {
file.new_mode = captures.get(1).map(|v| v.as_str().to_owned());
});
} else if let Some(captures) = DELETED_FILE_MODE.captures(line) {
state.current_file.as_mut().map(|file| {
file.deleted_file_mode = captures.get(1).map(|v| v.as_str().to_owned());
file.is_deleted = true;
});
} else if let Some(captures) = NEW_FILE_MODE.captures(line) {
state.current_file.as_mut().map(|file| {
file.new_file_mode = captures.get(1).map(|v| v.as_str().to_owned());
file.is_new = true;
});
} else if let Some(captures) = COPY_FROM.captures(line) {
state.current_file.as_mut().map(|file| {
if does_not_exist_hunk_header {
file.old_name = captures.get(1).map(|v| v.as_str().to_owned());
}
file.is_copy = true;
});
} else if let Some(captures) = COPY_TO.captures(line) {
state.current_file.as_mut().map(|file| {
if does_not_exist_hunk_header {
file.new_name = captures.get(1).map(|v| v.as_str().to_owned());
}
file.is_copy = true;
});
} else if let Some(captures) = RENAME_FROM.captures(line) {
state.current_file.as_mut().map(|file| {
if does_not_exist_hunk_header {
file.old_name = captures.get(1).map(|v| v.as_str().to_owned());
}
file.is_rename = true;
});
} else if let Some(captures) = RENAME_TO.captures(line) {
state.current_file.as_mut().map(|file| {
if does_not_exist_hunk_header {
file.new_name = captures.get(1).map(|v| v.as_str().to_owned());
}
file.is_rename = true;
});
} else if let Some(captures) = BINARY_FILES.captures(line) {
state.current_file.as_mut().map(|file| {
file.is_binary = true;
file.old_name =
get_filename(None, captures.get(1).map(|v| v.as_str()).unwrap(), None);
file.new_name =
get_filename(None, captures.get(2).map(|v| v.as_str()).unwrap(), None);
});
state.start_block("Binary file");
} else if let Some(_captures) = BINARY_DIFF.captures(line) {
state.current_file.as_mut().map(|file| {
file.is_binary = true;
});
state.start_block(line);
} else if let Some(captures) = SIMILARITY_INDEX.captures(line) {
state.current_file.as_mut().map(|file| {
file.unchanged_percentage = captures.get(1).map(|v| v.as_str().parse().unwrap());
});
} else if let Some(captures) = DISSIMILARITY_INDEX.captures(line) {
state.current_file.as_mut().map(|file| {
file.changed_percentage = captures.get(1).map(|v| v.as_str().parse().unwrap());
});
} else if let Some(captures) = INDEX.captures(line) {
state.current_file.as_mut().map(|file| {
file.checksum_before = Some(vec![captures.get(1).unwrap().as_str().to_owned()]);
file.checksum_after = captures.get(2).map(|v| v.as_str().to_owned());
if let Some(mode) = captures.get(3) {
file.mode = Some(mode.as_str().to_owned());
}
});
} else if let Some(captures) = COMBINED_INDEX.captures(line) {
state.current_file.as_mut().map(|file| {
file.checksum_before = Some(vec![
captures.get(1).unwrap().as_str().to_owned(),
captures.get(2).unwrap().as_str().to_owned(),
]);
file.checksum_after = captures.get(3).map(|v| v.as_str().to_owned());
});
} else if let Some(captures) = COMBINED_MODE.captures(line) {
state.current_file.as_mut().map(|file| {
file.old_mode = Some(vec![
captures.get(1).unwrap().as_str().to_owned(),
captures.get(2).unwrap().as_str().to_owned(),
]);
file.new_mode = captures.get(3).map(|v| v.as_str().to_owned());
});
} else if let Some(captures) = COMBINED_NEW_FILE.captures(line) {
state.current_file.as_mut().map(|file| {
file.new_file_mode = captures.get(1).map(|v| v.as_str().to_owned());
file.is_new = true;
});
} else if let Some(captures) = COMBINED_DELETED_FILE.captures(line) {
state.current_file.as_mut().map(|file| {
file.deleted_file_mode = captures.get(1).map(|v| v.as_str().to_owned());
file.is_deleted = true;
});
}
}
state.save_block();
state.save_file();
state.files
}
struct ParseState {
files: Vec<File>,
current_file: Option<File>,
current_block: Option<Block>,
old_line: Option<usize>,
old_line_2: Option<usize>,
new_line: Option<usize>,
possible_old_name: Option<String>,
possible_new_name: Option<String>,
}
impl ParseState {
fn new() -> ParseState {
ParseState {
files: Vec::new(),
current_file: None,
current_block: None,
old_line: None,
old_line_2: None,
new_line: None,
possible_old_name: None,
possible_new_name: None,
}
}
fn start_file(&mut self) {
self.save_block();
self.save_file();
let file = File::new();
self.current_file = Some(file);
}
fn save_file(&mut self) {
if self.current_file.is_some() && {
let file = self.current_file.as_mut().unwrap();
if file.old_name.is_none() {
file.old_name = self.possible_old_name.take();
}
if file.new_name.is_none() {
file.new_name = self.possible_new_name.take();
}
file.new_name.is_some()
} {
self.files.push(self.current_file.take().unwrap());
}
self.possible_old_name = None;
self.possible_new_name = None;
}
fn start_block(&mut self, line: &str) {
self.save_block();
lazy_static! {
static ref RANGE1: Regex =
Regex::new(r#"^@@ -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@.*"#).unwrap();
static ref RANGE2: Regex =
Regex::new(r#"@@@ -(\d+)(?:,\d+)? -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@@.*"#)
.unwrap();
}
let captures = RANGE1.captures(line);
match captures {
Some(captures) => {
self.current_file
.as_mut()
.map(|file| file.is_combined = false);
self.old_line = captures.get(1).map(|v| v.as_str().parse().unwrap());
self.new_line = captures.get(2).map(|v| v.as_str().parse().unwrap());
}
_ => {
let captures = RANGE2.captures(line);
match captures {
Some(captures) => {
self.current_file
.as_mut()
.map(|file| file.is_combined = true);
self.old_line = captures.get(1).map(|v| v.as_str().parse().unwrap());
self.old_line_2 = captures.get(2).map(|v| v.as_str().parse().unwrap());
self.new_line = captures.get(3).map(|v| v.as_str().parse().unwrap());
}
_ => {
if line.starts_with(HUNK_HEADER_PREFIX) {
eprintln!("Failed to parse lines, starting in 0!");
}
self.old_line = Some(0);
self.new_line = Some(0);
self.current_file
.as_mut()
.map(|file| file.is_combined = false);
}
}
}
}
let mut block = Block::new();
block.header = Some(line.to_owned());
self.current_block = Some(block);
}
fn save_block(&mut self) {
if let Some(block) = self.current_block.take() {
if let Some(ref mut file) = self.current_file {
file.blocks.push(block);
}
}
}
fn create_line(&mut self, line: &str) {
let mut line = Line::new(line.to_owned());
let add_line_prefixes = self
.current_file
.as_ref()
.map(|v| if !v.is_combined { "+" } else { "++" })
.unwrap();
let delete_line_prefixes = self
.current_file
.as_ref()
.map(|v| if !v.is_combined { "-" } else { "--" })
.unwrap();
if line.content.starts_with(add_line_prefixes) {
self.current_file.as_mut().unwrap().added_lines += 1;
line.line_type = Some(LineType::Inserts);
line.old_number = None;
line.new_number = self.new_line.to_owned();
self.new_line.as_mut().map(|v| *v += 1);
} else if line.content.starts_with(delete_line_prefixes) {
self.current_file.as_mut().unwrap().deleted_lines += 1;
line.line_type = Some(LineType::Deletes);
line.old_number = self.old_line.to_owned();
self.old_line.as_mut().map(|v| *v += 1);
line.new_number = None;
} else {
line.line_type = Some(LineType::Context);
line.old_number = self.old_line.to_owned();
self.old_line.as_mut().map(|v| *v += 1);
line.new_number = self.new_line.to_owned();
self.new_line.as_mut().map(|v| *v += 1);
}
self.current_block.as_mut().unwrap().lines.push(line);
}
}
#[derive(Debug, Serialize)]
pub struct File {
pub old_name: Option<String>,
pub new_name: Option<String>,
pub is_combined: bool,
pub is_git_diff: bool,
pub language: Option<String>,
pub blocks: Vec<Block>,
pub added_lines: usize,
pub deleted_lines: usize,
pub mode: Option<String>,
pub old_mode: Option<Vec<String>>,
pub new_mode: Option<String>,
pub new_file_mode: Option<String>,
pub deleted_file_mode: Option<String>,
pub is_deleted: bool,
pub is_new: bool,
pub is_copy: bool,
pub is_rename: bool,
pub is_binary: bool,
pub unchanged_percentage: Option<usize>,
pub changed_percentage: Option<usize>,
pub checksum_before: Option<Vec<String>>,
pub checksum_after: Option<String>,
}
impl File {
fn new() -> File {
File {
old_name: None,
new_name: None,
is_combined: false,
is_git_diff: false,
language: None,
blocks: Vec::new(),
added_lines: 0,
deleted_lines: 0,
mode: None,
old_mode: None,
new_mode: None,
new_file_mode: None,
deleted_file_mode: None,
is_deleted: false,
is_new: false,
is_copy: false,
is_rename: false,
is_binary: false,
unchanged_percentage: None,
changed_percentage: None,
checksum_before: None,
checksum_after: None,
}
}
}
#[derive(Clone, Debug, Hash, Serialize)]
pub struct Line {
pub content: String,
pub line_type: Option<LineType>,
pub old_number: Option<usize>,
pub new_number: Option<usize>,
}
impl Line {
fn new(line: String) -> Line {
Line {
content: line,
line_type: None,
old_number: None,
new_number: None,
}
}
}
#[derive(Clone, Debug, Hash, PartialEq, Serialize)]
pub enum LineType {
Inserts,
Deletes,
Context,
InsertChanges,
DeleteChanges,
}
#[derive(Debug, Serialize)]
pub struct Block {
pub lines: Vec<Line>,
pub header: Option<String>,
}
impl Block {
fn new() -> Block {
Block {
lines: Vec::new(),
header: None,
}
}
}
fn get_extension(filename: &str, language: Option<&str>) -> Option<String> {
let name_split: Vec<String> = filename.split(".").map(|v| v.to_string()).collect();
if name_split.len() > 1 {
return Some(name_split.get(name_split.len() - 1).unwrap().to_owned());
}
language.map(|v| v.to_owned())
}
fn get_src_filename(line: &str, prefix: Option<&str>) -> Option<String> {
get_filename(Some("---"), line, prefix)
}
fn get_dst_filename(line: &str, prefix: Option<&str>) -> Option<String> {
get_filename(Some("\\+\\+\\+"), line, prefix)
}
fn get_filename(
line_prefix: Option<&str>,
line: &str,
extra_prefix: Option<&str>,
) -> Option<String> {
lazy_static! {
static ref PREFIXES: Vec<&'static str> = vec!["a/", "b/", "i/", "w/", "c/", "o/"];
static ref FILENAME_REG: Regex = Regex::new(r#"^"?(.+?)"?$"#).unwrap();
static ref DATE: Regex =
Regex::new(r#"\s+\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(?:\.\d+)? [-+]\d{4}.*$"#).unwrap();
}
let mut filename = None;
let captures = match line_prefix {
Some(line_prefix) => Regex::new(&format!(r#"^{} "?(.+?)"?$"#, line_prefix))
.unwrap()
.captures(line),
_ => FILENAME_REG.captures(line),
};
if let Some(matches) = captures {
if matches.len() > 0 {
filename = Some(matches.get(1).unwrap().as_str().to_owned());
}
let mut matching_prefixes = PREFIXES
.iter()
.filter(|p| filename.as_ref().unwrap().contains(*p))
.map(|v| v.to_string())
.collect::<Vec<String>>();
if let Some(extra_prefix) = extra_prefix {
if filename.as_ref().unwrap().contains(extra_prefix) {
matching_prefixes.push(extra_prefix.to_owned());
}
}
if let Some(prefix) = matching_prefixes.get(0) {
filename
.as_mut()
.map(|filename| *filename = filename[prefix.len()..].to_string());
}
filename
.as_mut()
.map(|filename| *filename = DATE.replace(filename, "").to_string());
}
filename
}
fn exist_hunk_header(line: &str, lines: &Vec<&str>, index: usize) -> bool {
let mut idx = index;
while idx < lines.len() - 3 {
if line.starts_with("diff") {
return false;
}
if lines.get(idx).unwrap().starts_with(OLD_FILE_NAME_HEADER)
&& lines
.get(idx + 1)
.unwrap()
.starts_with(NEW_FILE_NAME_HEADER)
&& lines.get(idx + 2).unwrap().starts_with(HUNK_HEADER_PREFIX)
{
return true;
}
idx += 1;
}
false
}