use super::lex::SyntaxKind;
use super::lossless::{Patch, PositionedParseError};
use rowan::{GreenNodeBuilder, TextSize};
pub(crate) struct Parser<'a> {
tokens: Vec<(SyntaxKind, &'a str)>,
cursor: usize,
builder: GreenNodeBuilder<'static>,
errors: Vec<String>,
positioned_errors: Vec<PositionedParseError>,
text_position: TextSize,
}
impl<'a> Parser<'a> {
pub fn new(tokens: impl Iterator<Item = (SyntaxKind, &'a str)>) -> Self {
let tokens: Vec<_> = tokens.collect();
Self {
tokens,
cursor: 0,
builder: GreenNodeBuilder::new(),
errors: Vec::new(),
positioned_errors: Vec::new(),
text_position: TextSize::from(0),
}
}
pub fn parse(mut self) -> super::Parse<Patch> {
self.builder.start_node(SyntaxKind::ROOT.into());
self.parse_patch();
self.builder.finish_node();
let green = self.builder.finish();
super::Parse::new_with_positioned_errors(green, self.errors, self.positioned_errors)
}
fn parse_patch(&mut self) {
while !self.at_end() {
if self.at(SyntaxKind::STAR) && self.peek_text(0) == Some("***") {
if self.looks_like_context_hunk_range() {
self.builder
.start_node(SyntaxKind::CONTEXT_DIFF_FILE.into());
self.parse_context_hunk_without_separator();
self.builder.finish_node();
} else {
self.parse_context_diff_file();
}
} else if self.at(SyntaxKind::MINUS)
&& self.peek_text(0) == Some("---")
&& !self
.peek_text(3)
.map(|t| t.starts_with('>'))
.unwrap_or(false)
&& !self.looks_like_context_new_section()
{
self.parse_patch_file();
} else if self.at(SyntaxKind::PLUS) && self.peek_text(0) == Some("+++") {
self.parse_patch_file();
} else if self.looks_like_normal_diff() {
self.parse_normal_hunk();
} else if self.looks_like_ed_command() {
self.parse_ed_command();
} else {
self.skip_to_next_line();
}
}
}
fn parse_patch_file(&mut self) {
self.builder.start_node(SyntaxKind::PATCH_FILE.into());
if self.at(SyntaxKind::MINUS) && self.peek_text(0) == Some("---") {
self.parse_old_file();
}
if self.at(SyntaxKind::PLUS) && self.peek_text(0) == Some("+++") {
self.parse_new_file();
}
while self.at(SyntaxKind::AT) && self.peek_text(0) == Some("@@") {
self.parse_hunk();
}
self.builder.finish_node();
}
fn parse_old_file(&mut self) {
self.builder.start_node(SyntaxKind::OLD_FILE.into());
self.advance(); self.advance(); self.advance();
self.skip_whitespace();
let mut path_parts = Vec::new();
let mut collecting_path = true;
while !self.at(SyntaxKind::NEWLINE) && !self.at_end() && collecting_path {
match self.current_kind() {
Some(SyntaxKind::TEXT)
| Some(SyntaxKind::SLASH)
| Some(SyntaxKind::DOT)
| Some(SyntaxKind::NUMBER)
| Some(SyntaxKind::COLON)
| Some(SyntaxKind::BACKSLASH) => {
if let Some(text) = self.current_text() {
path_parts.push(text.to_string());
}
self.advance_without_emit();
}
Some(SyntaxKind::WHITESPACE) if !path_parts.is_empty() => {
collecting_path = false;
}
_ => {
collecting_path = false;
}
}
}
if !path_parts.is_empty() {
let path = path_parts.join("");
self.builder.token(SyntaxKind::PATH.into(), &path);
}
while !self.at(SyntaxKind::NEWLINE) && !self.at_end() {
self.advance();
}
if self.at(SyntaxKind::NEWLINE) {
self.advance();
}
self.builder.finish_node();
}
fn parse_new_file(&mut self) {
self.builder.start_node(SyntaxKind::NEW_FILE.into());
self.advance(); self.advance(); self.advance();
self.skip_whitespace();
let mut path_parts = Vec::new();
let mut collecting_path = true;
while !self.at(SyntaxKind::NEWLINE) && !self.at_end() && collecting_path {
match self.current_kind() {
Some(SyntaxKind::TEXT)
| Some(SyntaxKind::SLASH)
| Some(SyntaxKind::DOT)
| Some(SyntaxKind::NUMBER)
| Some(SyntaxKind::COLON)
| Some(SyntaxKind::BACKSLASH) => {
if let Some(text) = self.current_text() {
path_parts.push(text.to_string());
}
self.advance_without_emit();
}
Some(SyntaxKind::WHITESPACE) if !path_parts.is_empty() => {
collecting_path = false;
}
_ => {
collecting_path = false;
}
}
}
if !path_parts.is_empty() {
let path = path_parts.join("");
self.builder.token(SyntaxKind::PATH.into(), &path);
}
while !self.at(SyntaxKind::NEWLINE) && !self.at_end() {
self.advance();
}
if self.at(SyntaxKind::NEWLINE) {
self.advance();
}
self.builder.finish_node();
}
fn parse_hunk(&mut self) {
let checkpoint = self.builder.checkpoint();
let _start_cursor = self.cursor;
let mut temp_cursor = self.cursor;
temp_cursor += 2;
while temp_cursor < self.tokens.len()
&& self
.tokens
.get(temp_cursor)
.map(|(k, _)| *k == SyntaxKind::WHITESPACE)
.unwrap_or(false)
{
temp_cursor += 1;
}
let has_valid_range = temp_cursor < self.tokens.len() && {
let (kind, _) = self.tokens.get(temp_cursor).unwrap();
(*kind == SyntaxKind::MINUS || *kind == SyntaxKind::PLUS)
&& temp_cursor + 1 < self.tokens.len()
&& self
.tokens
.get(temp_cursor + 1)
.map(|(k, _)| *k == SyntaxKind::NUMBER)
.unwrap_or(false)
};
if !has_valid_range {
self.skip_to_next_line();
while !self.at_end() && !self.is_hunk_end() {
self.skip_to_next_line();
}
return;
}
self.builder
.start_node_at(checkpoint, SyntaxKind::HUNK.into());
self.parse_hunk_header();
while !self.at_end() && !self.is_hunk_end() {
self.parse_hunk_line();
}
self.builder.finish_node();
}
fn parse_hunk_header(&mut self) {
self.builder.start_node(SyntaxKind::HUNK_HEADER.into());
self.advance(); self.advance();
self.skip_whitespace();
if self.at(SyntaxKind::MINUS) {
self.parse_hunk_range();
}
self.skip_whitespace();
if self.at(SyntaxKind::PLUS) {
self.parse_hunk_range();
}
self.skip_whitespace();
if self.at(SyntaxKind::AT) {
self.advance(); self.advance(); }
while !self.at(SyntaxKind::NEWLINE) && !self.at_end() {
self.advance();
}
if self.at(SyntaxKind::NEWLINE) {
self.advance();
}
self.builder.finish_node();
}
fn parse_hunk_range(&mut self) {
self.builder.start_node(SyntaxKind::HUNK_RANGE.into());
self.advance();
if self.at(SyntaxKind::NUMBER) {
self.advance();
}
if self.at(SyntaxKind::COMMA) {
self.advance();
if self.at(SyntaxKind::NUMBER) {
self.advance();
}
}
self.builder.finish_node();
}
fn parse_hunk_line(&mut self) {
let checkpoint = self.builder.checkpoint();
match self.current_kind() {
Some(SyntaxKind::SPACE) => {
self.builder
.start_node_at(checkpoint, SyntaxKind::CONTEXT_LINE.into());
self.advance(); }
Some(SyntaxKind::PLUS) => {
self.builder
.start_node_at(checkpoint, SyntaxKind::ADD_LINE.into());
self.advance(); }
Some(SyntaxKind::MINUS) => {
self.builder
.start_node_at(checkpoint, SyntaxKind::DELETE_LINE.into());
self.advance(); }
_ => {
self.builder
.start_node_at(checkpoint, SyntaxKind::CONTEXT_LINE.into());
}
}
while !self.at(SyntaxKind::NEWLINE) && !self.at_end() {
self.advance();
}
if self.at(SyntaxKind::NEWLINE) {
self.advance();
}
self.builder.finish_node();
}
fn is_hunk_end(&self) -> bool {
(self.at(SyntaxKind::AT) && self.peek_text(0) == Some("@@"))
|| (self.at(SyntaxKind::MINUS) && self.peek_text(0) == Some("---"))
|| (self.at(SyntaxKind::PLUS) && self.peek_text(0) == Some("+++"))
}
fn current_kind(&self) -> Option<SyntaxKind> {
self.tokens.get(self.cursor).map(|(kind, _)| *kind)
}
fn current_text(&self) -> Option<&str> {
self.tokens.get(self.cursor).map(|(_, text)| *text)
}
fn peek_text(&self, offset: usize) -> Option<&str> {
let start = self.cursor + offset;
let mut text = String::new();
for i in 0..3 {
if let Some((_, t)) = self.tokens.get(start + i) {
text.push_str(t);
if text.len() >= 2 {
if text.starts_with("---") {
return Some("---");
} else if text.starts_with("+++") {
return Some("+++");
} else if text.starts_with("@@") {
return Some("@@");
}
}
if text.len() >= 3 {
if text.starts_with("***") {
return Some("***");
}
}
} else {
break;
}
}
self.tokens.get(start + offset).map(|(_, text)| *text)
}
fn at(&self, kind: SyntaxKind) -> bool {
self.current_kind() == Some(kind)
}
fn at_end(&self) -> bool {
self.cursor >= self.tokens.len()
}
fn advance(&mut self) {
if let Some((kind, text)) = self.tokens.get(self.cursor) {
self.builder.token((*kind).into(), text);
self.text_position += TextSize::from(text.len() as u32);
self.cursor += 1;
}
}
fn advance_without_emit(&mut self) {
if let Some((_, text)) = self.tokens.get(self.cursor) {
self.text_position += TextSize::from(text.len() as u32);
self.cursor += 1;
}
}
fn skip_whitespace(&mut self) {
while self.at(SyntaxKind::WHITESPACE) {
self.advance();
}
}
fn skip_to_next_line(&mut self) {
while !self.at(SyntaxKind::NEWLINE) && !self.at_end() {
self.advance();
}
if self.at(SyntaxKind::NEWLINE) {
self.advance();
}
}
fn looks_like_ed_command(&self) -> bool {
let mut offset = 0;
if !matches!(self.peek_kind(offset), Some(SyntaxKind::NUMBER)) {
return false;
}
while matches!(
self.peek_kind(offset),
Some(SyntaxKind::NUMBER) | Some(SyntaxKind::COMMA)
) {
offset += 1;
}
matches!(
self.peek_kind(offset),
Some(SyntaxKind::LETTER_A) | Some(SyntaxKind::LETTER_C) | Some(SyntaxKind::LETTER_D)
)
}
fn looks_like_normal_diff(&self) -> bool {
let mut offset = 0;
if !matches!(self.peek_kind(offset), Some(SyntaxKind::NUMBER)) {
return false;
}
while matches!(
self.peek_kind(offset),
Some(SyntaxKind::NUMBER) | Some(SyntaxKind::COMMA)
) {
offset += 1;
}
if !matches!(
self.peek_kind(offset),
Some(SyntaxKind::LETTER_A) | Some(SyntaxKind::LETTER_C) | Some(SyntaxKind::LETTER_D)
) {
return false;
}
offset += 1;
matches!(self.peek_kind(offset), Some(SyntaxKind::NUMBER))
}
fn peek_kind(&self, offset: usize) -> Option<SyntaxKind> {
self.tokens.get(self.cursor + offset).map(|(kind, _)| *kind)
}
fn parse_context_diff_file(&mut self) {
self.builder
.start_node(SyntaxKind::CONTEXT_DIFF_FILE.into());
if self.at(SyntaxKind::STAR) && self.peek_text(0) == Some("***") {
self.parse_context_old_file();
}
if self.at(SyntaxKind::MINUS) && self.peek_text(0) == Some("---") {
self.parse_context_new_file();
}
while self.at(SyntaxKind::STAR) && self.looks_like_context_hunk_separator() {
self.parse_context_hunk();
}
self.builder.finish_node();
}
fn parse_context_old_file(&mut self) {
self.builder.start_node(SyntaxKind::CONTEXT_OLD_FILE.into());
self.advance(); self.advance(); self.advance();
self.skip_whitespace();
self.parse_file_path();
self.skip_to_eol();
self.builder.finish_node();
}
fn parse_context_new_file(&mut self) {
self.builder.start_node(SyntaxKind::CONTEXT_NEW_FILE.into());
self.advance(); self.advance(); self.advance();
self.skip_whitespace();
self.parse_file_path();
self.skip_to_eol();
self.builder.finish_node();
}
fn parse_context_hunk(&mut self) {
self.builder.start_node(SyntaxKind::CONTEXT_HUNK.into());
self.builder
.start_node(SyntaxKind::CONTEXT_HUNK_HEADER.into());
while self.at(SyntaxKind::STAR) {
self.advance();
}
self.skip_to_eol();
self.builder.finish_node();
if self.at(SyntaxKind::STAR) && self.peek_text(0) == Some("***") {
self.parse_context_old_section();
}
if self.at(SyntaxKind::MINUS) && self.peek_text(0) == Some("---") {
self.parse_context_new_section();
}
self.builder.finish_node();
}
fn parse_context_hunk_without_separator(&mut self) {
self.builder.start_node(SyntaxKind::CONTEXT_HUNK.into());
if self.at(SyntaxKind::STAR) && self.peek_text(0) == Some("***") {
self.parse_context_old_section();
}
if self.at(SyntaxKind::MINUS) && self.peek_text(0) == Some("---") {
self.parse_context_new_section();
}
self.builder.finish_node();
}
fn parse_context_old_section(&mut self) {
self.builder
.start_node(SyntaxKind::CONTEXT_OLD_SECTION.into());
self.advance(); self.advance(); self.advance();
self.skip_whitespace();
self.parse_hunk_range();
while !self.at(SyntaxKind::NEWLINE) && !self.at_end() {
self.advance();
}
if self.at(SyntaxKind::NEWLINE) {
self.advance();
}
while !self.at_end() && !self.is_context_section_end() {
self.parse_context_line();
}
self.builder.finish_node();
}
fn parse_context_new_section(&mut self) {
self.builder
.start_node(SyntaxKind::CONTEXT_NEW_SECTION.into());
self.advance(); self.advance(); self.advance();
self.skip_whitespace();
self.parse_hunk_range();
while !self.at(SyntaxKind::NEWLINE) && !self.at_end() {
self.advance();
}
if self.at(SyntaxKind::NEWLINE) {
self.advance();
}
while !self.at_end() && !self.is_context_section_end() {
self.parse_context_line();
}
self.builder.finish_node();
}
fn parse_context_line(&mut self) {
let checkpoint = self.builder.checkpoint();
match self.current_kind() {
Some(SyntaxKind::SPACE) => {
self.builder
.start_node_at(checkpoint, SyntaxKind::CONTEXT_LINE.into());
self.advance(); }
Some(SyntaxKind::EXCLAMATION) => {
self.builder
.start_node_at(checkpoint, SyntaxKind::CONTEXT_CHANGE_LINE.into());
self.advance(); }
Some(SyntaxKind::PLUS) => {
self.builder
.start_node_at(checkpoint, SyntaxKind::ADD_LINE.into());
self.advance(); }
Some(SyntaxKind::MINUS) => {
self.builder
.start_node_at(checkpoint, SyntaxKind::DELETE_LINE.into());
self.advance(); }
_ => {
self.builder
.start_node_at(checkpoint, SyntaxKind::CONTEXT_LINE.into());
}
}
while !self.at(SyntaxKind::NEWLINE) && !self.at_end() {
self.advance();
}
if self.at(SyntaxKind::NEWLINE) {
self.advance();
}
self.builder.finish_node();
}
fn is_context_section_end(&self) -> bool {
(self.at(SyntaxKind::MINUS) && self.peek_text(0) == Some("---"))
|| (self.at(SyntaxKind::STAR)
&& (self.peek_text(0) == Some("***") || self.looks_like_context_hunk_separator()))
}
fn parse_ed_command(&mut self) {
self.builder.start_node(SyntaxKind::ED_COMMAND.into());
let checkpoint = self.builder.checkpoint();
while self.at(SyntaxKind::NUMBER) || self.at(SyntaxKind::COMMA) {
self.advance();
}
let cmd = self.current_kind();
match cmd {
Some(SyntaxKind::LETTER_A) => {
self.builder
.start_node_at(checkpoint, SyntaxKind::ED_ADD_COMMAND.into());
self.advance();
self.skip_to_eol();
self.parse_ed_content_lines();
self.builder.finish_node();
}
Some(SyntaxKind::LETTER_D) => {
self.builder
.start_node_at(checkpoint, SyntaxKind::ED_DELETE_COMMAND.into());
self.advance();
self.skip_to_eol();
self.builder.finish_node();
}
Some(SyntaxKind::LETTER_C) => {
self.builder
.start_node_at(checkpoint, SyntaxKind::ED_CHANGE_COMMAND.into());
self.advance();
self.skip_to_eol();
self.parse_ed_content_lines();
self.builder.finish_node();
}
_ => {
self.skip_to_eol();
}
}
self.builder.finish_node();
}
fn parse_ed_content_lines(&mut self) {
while !self.at_end() {
if self.at(SyntaxKind::DOT) && self.peek_kind(1) == Some(SyntaxKind::NEWLINE) {
self.advance(); self.advance(); break;
}
self.builder.start_node(SyntaxKind::ED_CONTENT_LINE.into());
while !self.at(SyntaxKind::NEWLINE) && !self.at_end() {
self.advance();
}
if self.at(SyntaxKind::NEWLINE) {
self.advance();
}
self.builder.finish_node();
}
}
fn parse_normal_hunk(&mut self) {
self.builder.start_node(SyntaxKind::NORMAL_HUNK.into());
self.builder
.start_node(SyntaxKind::NORMAL_CHANGE_COMMAND.into());
while self.at(SyntaxKind::NUMBER) || self.at(SyntaxKind::COMMA) {
self.advance();
}
if matches!(
self.current_kind(),
Some(SyntaxKind::LETTER_A) | Some(SyntaxKind::LETTER_C) | Some(SyntaxKind::LETTER_D)
) {
self.advance();
}
while self.at(SyntaxKind::NUMBER) || self.at(SyntaxKind::COMMA) {
self.advance();
}
self.skip_to_eol();
self.builder.finish_node();
if self.at(SyntaxKind::LESS_THAN) {
self.parse_normal_old_lines();
}
if self.at(SyntaxKind::MINUS) && self.peek_text(0) == Some("---") {
self.parse_normal_separator();
}
if self.at(SyntaxKind::GREATER_THAN) {
self.parse_normal_new_lines();
}
self.builder.finish_node();
}
fn parse_normal_old_lines(&mut self) {
self.builder.start_node(SyntaxKind::NORMAL_OLD_LINES.into());
while self.at(SyntaxKind::LESS_THAN) {
self.builder.start_node(SyntaxKind::DELETE_LINE.into());
self.advance(); self.skip_whitespace();
while !self.at(SyntaxKind::NEWLINE) && !self.at_end() {
self.advance();
}
if self.at(SyntaxKind::NEWLINE) {
self.advance();
}
self.builder.finish_node();
}
self.builder.finish_node();
}
fn parse_normal_separator(&mut self) {
self.builder.start_node(SyntaxKind::NORMAL_SEPARATOR.into());
self.advance(); self.advance(); self.advance();
self.skip_to_eol();
self.builder.finish_node();
}
fn parse_normal_new_lines(&mut self) {
self.builder.start_node(SyntaxKind::NORMAL_NEW_LINES.into());
while self.at(SyntaxKind::GREATER_THAN) {
self.builder.start_node(SyntaxKind::ADD_LINE.into());
self.advance(); self.skip_whitespace();
while !self.at(SyntaxKind::NEWLINE) && !self.at_end() {
self.advance();
}
if self.at(SyntaxKind::NEWLINE) {
self.advance();
}
self.builder.finish_node();
}
self.builder.finish_node();
}
fn parse_file_path(&mut self) {
let mut path_parts = Vec::new();
let mut collecting_path = true;
while !self.at(SyntaxKind::NEWLINE) && !self.at_end() && collecting_path {
match self.current_kind() {
Some(SyntaxKind::TEXT)
| Some(SyntaxKind::SLASH)
| Some(SyntaxKind::DOT)
| Some(SyntaxKind::NUMBER)
| Some(SyntaxKind::MINUS)
| Some(SyntaxKind::STAR)
| Some(SyntaxKind::COLON)
| Some(SyntaxKind::BACKSLASH) => {
if let Some(text) = self.current_text() {
path_parts.push(text.to_string());
}
self.advance_without_emit();
}
Some(SyntaxKind::WHITESPACE) if !path_parts.is_empty() => {
collecting_path = false;
}
_ => {
collecting_path = false;
}
}
}
if !path_parts.is_empty() {
let path = path_parts.join("");
self.builder.token(SyntaxKind::PATH.into(), &path);
}
}
fn skip_to_eol(&mut self) {
while !self.at(SyntaxKind::NEWLINE) && !self.at_end() {
self.advance();
}
if self.at(SyntaxKind::NEWLINE) {
self.advance();
}
}
fn looks_like_context_hunk_separator(&self) -> bool {
let mut offset = 0;
let mut star_count = 0;
while matches!(self.peek_kind(offset), Some(SyntaxKind::STAR)) {
star_count += 1;
offset += 1;
}
star_count >= 7 && matches!(self.peek_kind(offset), Some(SyntaxKind::NEWLINE) | None)
}
fn looks_like_context_hunk_range(&self) -> bool {
if !self.at(SyntaxKind::STAR) || self.peek_text(0) != Some("***") {
return false;
}
let mut offset = 3;
while matches!(self.peek_kind(offset), Some(SyntaxKind::WHITESPACE)) {
offset += 1;
}
if !matches!(self.peek_kind(offset), Some(SyntaxKind::NUMBER)) {
return false;
}
while matches!(
self.peek_kind(offset),
Some(SyntaxKind::NUMBER) | Some(SyntaxKind::COMMA)
) {
offset += 1;
}
while matches!(self.peek_kind(offset), Some(SyntaxKind::WHITESPACE)) {
offset += 1;
}
let mut star_count = 0;
while matches!(self.peek_kind(offset), Some(SyntaxKind::STAR)) {
star_count += 1;
offset += 1;
}
star_count >= 3
}
fn looks_like_context_new_section(&self) -> bool {
if !self.at(SyntaxKind::MINUS) || self.peek_text(0) != Some("---") {
return false;
}
let mut offset = 3;
while matches!(self.peek_kind(offset), Some(SyntaxKind::WHITESPACE)) {
offset += 1;
}
if !matches!(self.peek_kind(offset), Some(SyntaxKind::NUMBER)) {
return false;
}
while matches!(
self.peek_kind(offset),
Some(SyntaxKind::NUMBER) | Some(SyntaxKind::COMMA)
) {
offset += 1;
}
while matches!(self.peek_kind(offset), Some(SyntaxKind::WHITESPACE)) {
offset += 1;
}
let mut minus_count = 0;
while matches!(self.peek_kind(offset), Some(SyntaxKind::MINUS)) {
minus_count += 1;
offset += 1;
}
minus_count >= 3
}
}
#[cfg(test)]
#[path = "parse_tests.rs"]
mod tests;
#[cfg(test)]
#[path = "error_recovery_tests.rs"]
mod error_recovery_tests;
#[cfg(test)]
#[path = "additional_tests.rs"]
mod additional_tests;
#[cfg(test)]
#[path = "format_tests.rs"]
mod format_tests;
#[cfg(test)]
#[path = "corner_case_tests.rs"]
mod corner_case_tests;