use ropey::Rope;
use crate::{
Lexer, Module, Shared, Token,
cst::{
node::Node,
parser::{ErrorReporter, Parser},
},
lexer,
};
const FULL_PARSE_THRESHOLD: f64 = 0.7;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TextEdit {
pub start: usize,
pub end: usize,
pub new_text: String,
}
impl TextEdit {
pub fn new(start: usize, end: usize, new_text: impl Into<String>) -> Self {
Self {
start,
end,
new_text: new_text.into(),
}
}
}
pub struct IncrementalParser {
source: Rope,
tokens: Vec<Shared<Token>>,
nodes: Vec<Shared<Node>>,
node_token_ranges: Vec<(usize, usize)>,
errors: ErrorReporter,
}
impl IncrementalParser {
pub fn new(source: &str) -> Self {
let tokens = Self::lex(source);
let (nodes, node_token_ranges, errors) = Self::parse_tokens(&tokens);
Self {
source: Rope::from_str(source),
tokens,
nodes,
node_token_ranges,
errors,
}
}
pub fn source(&self) -> String {
self.source.to_string()
}
pub fn byte_len(&self) -> usize {
self.source.len_bytes()
}
pub fn char_len(&self) -> usize {
self.source.len_chars()
}
pub fn byte_to_char_offset(&self, byte_offset: usize) -> Option<usize> {
let len = self.source.len_bytes();
if byte_offset > len {
return None;
}
if !is_rope_char_boundary(&self.source, byte_offset) {
return None;
}
Some(self.source.byte_to_char(byte_offset))
}
pub fn char_to_byte_offset(&self, char_offset: usize) -> Option<usize> {
if char_offset > self.source.len_chars() {
return None;
}
Some(self.source.char_to_byte(char_offset))
}
pub fn result(&self) -> (&[Shared<Node>], &ErrorReporter) {
(&self.nodes, &self.errors)
}
pub fn update(&mut self, new_source: &str) -> (&[Shared<Node>], &ErrorReporter) {
let new_tokens = Self::lex(new_source);
let prefix = self
.tokens
.iter()
.zip(new_tokens.iter())
.take_while(|(a, b)| tokens_same_kind(a, b))
.count();
let old_suffix = self
.tokens
.iter()
.rev()
.zip(new_tokens.iter().rev())
.take_while(|(a, b)| tokens_same_kind(a, b))
.count();
let old_changed_end = self.tokens.len().saturating_sub(old_suffix);
let new_changed_end = new_tokens.len().saturating_sub(old_suffix);
if prefix >= old_changed_end && prefix >= new_changed_end {
self.source = Rope::from_str(new_source);
self.tokens = new_tokens;
return (&self.nodes, &self.errors);
}
let first_affected = self.node_token_ranges.partition_point(|&(_, end)| end <= prefix);
let last_affected = self
.node_token_ranges
.partition_point(|&(start, _)| start < old_changed_end);
let total_nodes = self.nodes.len();
let affected_nodes = last_affected.saturating_sub(first_affected);
let should_full_parse =
total_nodes == 0 || (affected_nodes as f64 / total_nodes as f64) >= FULL_PARSE_THRESHOLD;
if should_full_parse {
return self.full_parse(new_source, new_tokens);
}
let reparse_old_start = self
.node_token_ranges
.get(first_affected)
.map(|r| r.0)
.unwrap_or(prefix);
let reparse_old_end = if last_affected > 0 {
self.node_token_ranges
.get(last_affected - 1)
.map(|r| r.1)
.unwrap_or(old_changed_end)
} else {
old_changed_end
};
let reparse_new_start = reparse_old_start;
let delta = new_tokens.len() as isize - self.tokens.len() as isize;
let reparse_new_end = ((reparse_old_end as isize) + delta).max(reparse_new_start as isize) as usize;
let (new_nodes, new_ranges, _) = Self::parse_tokens_range(&new_tokens, reparse_new_start, reparse_new_end);
for range in &mut self.node_token_ranges[last_affected..] {
range.0 = ((range.0 as isize) + delta) as usize;
range.1 = ((range.1 as isize) + delta) as usize;
}
self.nodes.splice(first_affected..last_affected, new_nodes);
self.node_token_ranges.splice(first_affected..last_affected, new_ranges);
self.source = Rope::from_str(new_source);
self.tokens = new_tokens;
let (_, _, errors) = Self::parse_tokens(&self.tokens);
self.errors = errors;
(&self.nodes, &self.errors)
}
pub fn apply_edit(&mut self, edit: &TextEdit) -> Result<(&[Shared<Node>], &ErrorReporter), String> {
if edit.start > edit.end {
return Err(format!("TextEdit: start ({}) > end ({})", edit.start, edit.end));
}
let char_len = self.source.len_chars();
if edit.end > char_len {
return Err(format!(
"TextEdit: end ({}) out of range (source is {} chars)",
edit.end, char_len
));
}
self.source.remove(edit.start..edit.end);
self.source.insert(edit.start, &edit.new_text);
let new_source = self.source.to_string();
Ok(self.update(&new_source))
}
fn full_parse(&mut self, new_source: &str, new_tokens: Vec<Shared<Token>>) -> (&[Shared<Node>], &ErrorReporter) {
let (nodes, node_token_ranges, errors) = Self::parse_tokens(&new_tokens);
self.source = Rope::from_str(new_source);
self.tokens = new_tokens;
self.nodes = nodes;
self.node_token_ranges = node_token_ranges;
self.errors = errors;
(&self.nodes, &self.errors)
}
fn lex(source: &str) -> Vec<Shared<Token>> {
Lexer::new(lexer::Options {
ignore_errors: true,
include_spaces: true,
})
.tokenize(source, Module::TOP_LEVEL_MODULE_ID)
.unwrap_or_default()
.into_iter()
.map(Shared::new)
.collect()
}
fn parse_tokens(tokens: &[Shared<Token>]) -> (Vec<Shared<Node>>, Vec<(usize, usize)>, ErrorReporter) {
let mut parser = Parser::new(tokens);
parser.parse_with_ranges()
}
fn parse_tokens_range(
tokens: &[Shared<Token>],
start: usize,
end: usize,
) -> (Vec<Shared<Node>>, Vec<(usize, usize)>, ErrorReporter) {
let end = end.min(tokens.len());
if start >= end {
return (Vec::new(), Vec::new(), ErrorReporter::default());
}
let sub_slice = &tokens[start..end];
let mut parser = Parser::new(sub_slice);
let (nodes, ranges, errors) = parser.parse_with_ranges();
let adjusted: Vec<(usize, usize)> = ranges.into_iter().map(|(s, e)| (s + start, e + start)).collect();
(nodes, adjusted, errors)
}
}
fn is_rope_char_boundary(rope: &Rope, byte_idx: usize) -> bool {
let len = rope.len_bytes();
if byte_idx == 0 || byte_idx == len {
return true;
}
if byte_idx > len {
return false;
}
rope.byte(byte_idx) & 0xC0 != 0x80
}
fn tokens_same_kind(a: &Shared<Token>, b: &Shared<Token>) -> bool {
a.kind == b.kind
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_incremental_no_change() {
let source = "upcase() | downcase()";
let mut parser = IncrementalParser::new(source);
let (nodes_before, _) = parser.result();
let count_before = nodes_before.len();
parser.update(source);
let (nodes_after, _) = parser.result();
assert_eq!(nodes_after.len(), count_before);
}
#[test]
fn test_incremental_append() {
let source = "upcase()";
let mut parser = IncrementalParser::new(source);
let (nodes, errors) = parser.update("upcase() | downcase()");
assert!(!errors.has_errors());
assert!(!nodes.is_empty());
}
#[test]
fn test_incremental_def_change() {
let source = "def foo(): \"hello\" end\n| upcase()";
let mut parser = IncrementalParser::new(source);
let (_, errors) = parser.result();
assert!(!errors.has_errors());
let (_, errors) = parser.update("def foo(): \"world\" end\n| upcase()");
assert!(!errors.has_errors());
}
#[test]
fn test_multibyte_char_edit() {
let source = "\"こんにちは\" | upcase()";
let mut parser = IncrementalParser::new(source);
let (_, errors) = parser.result();
assert!(!errors.has_errors());
let edit = TextEdit::new(1, 6, "世界");
let (_, errors) = parser.apply_edit(&edit).unwrap();
assert!(!errors.has_errors());
assert_eq!(parser.source(), "\"世界\" | upcase()");
}
#[test]
fn test_multibyte_byte_to_char_offset() {
let source = "# こんにちは";
let parser = IncrementalParser::new(source);
assert_eq!(parser.byte_to_char_offset(0), Some(0));
assert_eq!(parser.byte_to_char_offset(2), Some(2)); assert_eq!(parser.byte_to_char_offset(3), None);
assert_eq!(parser.byte_to_char_offset(5), Some(3)); }
#[test]
fn test_multibyte_char_to_byte_offset() {
let source = "# こんにちは";
let parser = IncrementalParser::new(source);
assert_eq!(parser.char_to_byte_offset(0), Some(0));
assert_eq!(parser.char_to_byte_offset(2), Some(2)); assert_eq!(parser.char_to_byte_offset(3), Some(5)); assert_eq!(parser.char_to_byte_offset(7), Some(17)); }
#[test]
fn test_apply_char_edit_out_of_range() {
let source = "abc";
let mut parser = IncrementalParser::new(source);
let edit = TextEdit::new(0, 10, "x"); assert!(parser.apply_edit(&edit).is_err());
}
#[test]
fn test_full_parse_fallback_triggered() {
let source = "upcase() | downcase() | ltrim() | rtrim() | length()";
let mut parser = IncrementalParser::new(source);
let (_, errors) = parser.result();
assert!(!errors.has_errors());
let new_source = "starts_with(\"x\") | ends_with(\"y\") | contains(\"z\")";
let (nodes, errors) = parser.update(new_source);
assert!(!errors.has_errors());
assert!(!nodes.is_empty());
assert_eq!(parser.source(), new_source);
}
#[test]
fn test_full_parse_on_empty_nodes() {
let mut parser = IncrementalParser::new("");
let (nodes, errors) = parser.update("upcase()");
assert!(!errors.has_errors());
assert!(!nodes.is_empty());
}
}