use crate::glr_incremental::{ForestNode, GLREdit, GLRToken};
use crate::subtree::Subtree;
use adze_ir::SymbolId;
use std::collections::{HashMap, VecDeque};
use std::sync::Arc;
#[derive(Debug, Clone, PartialEq)]
pub enum EditClass {
SingleCharInsertion,
SingleCharDeletion,
TokenReplacement,
WhitespaceOnly,
CommentOnly,
StructuralChange,
Multiple,
}
impl EditClass {
pub fn classify(edit: &GLREdit) -> Self {
let old_len = edit.old_range.len();
let new_len = edit.new_text.len();
if old_len == 0 && new_len == 1 {
return EditClass::SingleCharInsertion;
}
if old_len == 1 && new_len == 0 {
return EditClass::SingleCharDeletion;
}
if Self::is_whitespace_change(&edit.new_text) {
return EditClass::WhitespaceOnly;
}
if Self::is_comment_change(&edit.new_text) {
return EditClass::CommentOnly;
}
if edit.old_token_range.len() == 1 && edit.new_tokens.len() == 1 {
return EditClass::TokenReplacement;
}
if old_len > 50 || new_len > 50 {
return EditClass::StructuralChange;
}
EditClass::Multiple
}
fn is_whitespace_change(text: &[u8]) -> bool {
text.iter().all(|&b| b.is_ascii_whitespace())
}
fn is_comment_change(text: &[u8]) -> bool {
let text_str = String::from_utf8_lossy(text);
text_str.starts_with("//") || text_str.starts_with("/*") || text_str.starts_with("#")
}
}
pub struct OptimizedReparser {
parse_cache: ParseCache,
stats: ReparseStats,
}
struct ParseCache {
token_cache: HashMap<Vec<SymbolId>, Arc<Subtree>>,
lru_queue: VecDeque<Vec<SymbolId>>,
max_size: usize,
}
impl ParseCache {
fn new(max_size: usize) -> Self {
Self {
token_cache: HashMap::new(),
lru_queue: VecDeque::new(),
max_size,
}
}
fn get(&mut self, tokens: &[SymbolId]) -> Option<Arc<Subtree>> {
if let Some(subtree) = self.token_cache.get(tokens) {
self.lru_queue.retain(|t| t != tokens);
self.lru_queue.push_front(tokens.to_vec());
Some(subtree.clone())
} else {
None
}
}
fn insert(&mut self, tokens: Vec<SymbolId>, subtree: Arc<Subtree>) {
if self.token_cache.len() >= self.max_size {
if let Some(old_tokens) = self.lru_queue.pop_back() {
self.token_cache.remove(&old_tokens);
}
}
self.token_cache.insert(tokens.clone(), subtree);
self.lru_queue.push_front(tokens);
}
}
#[derive(Debug, Default)]
pub struct ReparseStats {
pub total_reparses: usize,
pub cache_hits: usize,
pub cache_misses: usize,
pub subtrees_reused: usize,
pub full_reparses: usize,
pub optimized_reparses: usize,
}
impl OptimizedReparser {
pub fn new() -> Self {
Self {
parse_cache: ParseCache::new(1000),
stats: ReparseStats::default(),
}
}
pub fn optimize_reparse(
&mut self,
edit: &GLREdit,
tokens: &[GLRToken],
reuse_map: &ReuseMap,
) -> Option<Arc<ForestNode>> {
self.stats.total_reparses += 1;
let edit_class = EditClass::classify(edit);
match edit_class {
EditClass::SingleCharInsertion => self.handle_char_insertion(edit, tokens, reuse_map),
EditClass::SingleCharDeletion => self.handle_char_deletion(edit, tokens, reuse_map),
EditClass::TokenReplacement => self.handle_token_replacement(edit, tokens, reuse_map),
EditClass::WhitespaceOnly => self.handle_whitespace_change(edit, tokens, reuse_map),
EditClass::CommentOnly => self.handle_comment_change(edit, tokens, reuse_map),
_ => {
self.stats.full_reparses += 1;
None
}
}
}
fn handle_char_insertion(
&mut self,
edit: &GLREdit,
tokens: &[GLRToken],
reuse_map: &ReuseMap,
) -> Option<Arc<ForestNode>> {
let affected_token_idx = tokens.iter().position(|t| {
t.start_byte <= edit.old_range.start && t.end_byte > edit.old_range.start
})?;
let _affected_token = &tokens[affected_token_idx];
let token_symbols: Vec<SymbolId> = tokens.iter().map(|t| t.symbol).collect();
if let Some(cached) = self.parse_cache.get(&token_symbols) {
self.stats.cache_hits += 1;
self.stats.optimized_reparses += 1;
return Some(self.adjust_forest_offsets(cached, edit));
}
self.stats.cache_misses += 1;
if self.can_reuse_surrounding_subtrees(edit, reuse_map) {
self.stats.subtrees_reused += 1;
self.stats.optimized_reparses += 1;
return self.reparse_minimal_region(edit, tokens, reuse_map);
}
None
}
fn handle_char_deletion(
&mut self,
edit: &GLREdit,
tokens: &[GLRToken],
reuse_map: &ReuseMap,
) -> Option<Arc<ForestNode>> {
self.handle_char_insertion(edit, tokens, reuse_map)
}
fn handle_token_replacement(
&mut self,
edit: &GLREdit,
tokens: &[GLRToken],
reuse_map: &ReuseMap,
) -> Option<Arc<ForestNode>> {
if edit.old_token_range.len() != 1 || edit.new_tokens.len() != 1 {
return None;
}
let token_idx = edit.old_token_range.start;
if token_idx < tokens.len() && tokens[token_idx].symbol == edit.new_tokens[0].symbol {
self.stats.optimized_reparses += 1;
return self.replace_single_token(token_idx, &edit.new_tokens[0], tokens, reuse_map);
}
None
}
fn handle_whitespace_change(
&mut self,
_edit: &GLREdit,
tokens: &[GLRToken],
reuse_map: &ReuseMap,
) -> Option<Arc<ForestNode>> {
self.stats.optimized_reparses += 1;
self.stats.subtrees_reused += tokens.len();
self.get_existing_forest(reuse_map)
}
fn handle_comment_change(
&mut self,
edit: &GLREdit,
tokens: &[GLRToken],
reuse_map: &ReuseMap,
) -> Option<Arc<ForestNode>> {
self.handle_whitespace_change(edit, tokens, reuse_map)
}
fn can_reuse_surrounding_subtrees(&self, edit: &GLREdit, reuse_map: &ReuseMap) -> bool {
let before_range = 0..edit.old_range.start;
let after_range = edit.old_range.end..usize::MAX;
!reuse_map.is_affected(&before_range) && !reuse_map.is_affected(&after_range)
}
fn reparse_minimal_region(
&self,
_edit: &GLREdit,
_tokens: &[GLRToken],
_reuse_map: &ReuseMap,
) -> Option<Arc<ForestNode>> {
None
}
fn replace_single_token(
&self,
_token_idx: usize,
_new_token: &GLRToken,
_tokens: &[GLRToken],
_reuse_map: &ReuseMap,
) -> Option<Arc<ForestNode>> {
None
}
fn adjust_forest_offsets(&self, _subtree: Arc<Subtree>, _edit: &GLREdit) -> Arc<ForestNode> {
Arc::new(ForestNode {
symbol: SymbolId(0),
alternatives: vec![],
byte_range: 0..0,
token_range: 0..0,
cached_subtree: None,
})
}
fn get_existing_forest(&self, _reuse_map: &ReuseMap) -> Option<Arc<ForestNode>> {
None
}
pub fn stats(&self) -> &ReparseStats {
&self.stats
}
pub fn reset_stats(&mut self) {
self.stats = ReparseStats::default();
}
}
pub struct BoundaryDetector {
grammar_info: GrammarInfo,
}
#[derive(Debug)]
struct GrammarInfo {
statement_starters: Vec<SymbolId>,
statement_enders: Vec<SymbolId>,
delimiter_pairs: Vec<(SymbolId, SymbolId)>,
}
impl BoundaryDetector {
pub fn new() -> Self {
Self {
grammar_info: GrammarInfo {
statement_starters: vec![],
statement_enders: vec![],
delimiter_pairs: vec![],
},
}
}
pub fn find_boundaries(&self, edit: &GLREdit, tokens: &[GLRToken]) -> (usize, usize) {
let mut start_boundary = edit.old_token_range.start;
let mut end_boundary = edit.old_token_range.end;
start_boundary = self.find_statement_start(start_boundary, tokens);
end_boundary = self.find_statement_end(end_boundary, tokens);
self.balance_delimiters(start_boundary, end_boundary, tokens)
}
fn find_statement_start(&self, from: usize, tokens: &[GLRToken]) -> usize {
for i in (0..from.min(tokens.len())).rev() {
if self
.grammar_info
.statement_starters
.contains(&tokens[i].symbol)
{
return i;
}
}
0
}
fn find_statement_end(&self, from: usize, tokens: &[GLRToken]) -> usize {
for i in from..tokens.len() {
if self
.grammar_info
.statement_enders
.contains(&tokens[i].symbol)
{
return i + 1;
}
}
tokens.len()
}
fn balance_delimiters(
&self,
mut start: usize,
mut end: usize,
tokens: &[GLRToken],
) -> (usize, usize) {
let mut delimiter_stack = Vec::new();
for i in start..end.min(tokens.len()) {
let symbol = tokens[i].symbol;
for (open, close) in &self.grammar_info.delimiter_pairs {
if symbol == *open {
delimiter_stack.push(*close);
} else if symbol == *close {
if delimiter_stack.last() == Some(close) {
delimiter_stack.pop();
} else {
start = self.find_matching_opener(i, tokens, *open).unwrap_or(0);
}
}
}
}
if !delimiter_stack.is_empty() {
end = self
.find_closers(end, &delimiter_stack, tokens)
.unwrap_or(tokens.len());
}
(start, end)
}
fn find_matching_opener(
&self,
from: usize,
tokens: &[GLRToken],
opener: SymbolId,
) -> Option<usize> {
for i in (0..from).rev() {
if tokens[i].symbol == opener {
return Some(i);
}
}
None
}
fn find_closers(
&self,
from: usize,
closers: &[SymbolId],
tokens: &[GLRToken],
) -> Option<usize> {
let mut remaining = closers.to_vec();
for i in from..tokens.len() {
if let Some(pos) = remaining.iter().position(|&c| c == tokens[i].symbol) {
remaining.remove(pos);
if remaining.is_empty() {
return Some(i + 1);
}
}
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_edit_classification() {
let edit = GLREdit {
old_range: 5..5,
new_text: b"x".to_vec(),
old_token_range: 0..0,
new_tokens: vec![],
old_tokens: vec![],
old_forest: None,
};
assert_eq!(EditClass::classify(&edit), EditClass::SingleCharInsertion);
let edit = GLREdit {
old_range: 5..6,
new_text: vec![],
old_token_range: 0..0,
new_tokens: vec![],
old_tokens: vec![],
old_forest: None,
};
assert_eq!(EditClass::classify(&edit), EditClass::SingleCharDeletion);
let edit = GLREdit {
old_range: 5..10,
new_text: b" \n\t ".to_vec(),
old_token_range: 0..0,
new_tokens: vec![],
old_tokens: vec![],
old_forest: None,
};
assert_eq!(EditClass::classify(&edit), EditClass::WhitespaceOnly);
let edit = GLREdit {
old_range: 5..10,
new_text: b"// comment".to_vec(),
old_token_range: 0..0,
new_tokens: vec![],
old_tokens: vec![],
old_forest: None,
};
assert_eq!(EditClass::classify(&edit), EditClass::CommentOnly);
}
#[test]
fn test_parse_cache() {
let mut cache = ParseCache::new(2);
let tokens1 = vec![SymbolId(1), SymbolId(2)];
let tokens2 = vec![SymbolId(3), SymbolId(4)];
let tokens3 = vec![SymbolId(5), SymbolId(6)];
let node1 = SubtreeNode {
symbol_id: SymbolId(1),
is_error: false,
byte_range: 0..10,
};
let node2 = SubtreeNode {
symbol_id: SymbolId(2),
is_error: false,
byte_range: 10..20,
};
let node3 = SubtreeNode {
symbol_id: SymbolId(3),
is_error: false,
byte_range: 20..30,
};
let subtree1 = Arc::new(Subtree::new(node1, vec![]));
let subtree2 = Arc::new(Subtree::new(node2, vec![]));
let subtree3 = Arc::new(Subtree::new(node3, vec![]));
cache.insert(tokens1.clone(), subtree1.clone());
cache.insert(tokens2.clone(), subtree2.clone());
assert!(cache.get(&tokens1).is_some());
cache.insert(tokens3.clone(), subtree3.clone());
assert!(cache.get(&tokens1).is_some());
assert!(cache.get(&tokens2).is_none()); assert!(cache.get(&tokens3).is_some());
}
#[test]
fn test_boundary_detector() {
let detector = BoundaryDetector::new();
let tokens = vec![
GLRToken {
symbol: SymbolId(1),
text: b"if".to_vec(),
start_byte: 0,
end_byte: 2,
},
GLRToken {
symbol: SymbolId(2),
text: b"(".to_vec(),
start_byte: 3,
end_byte: 4,
},
GLRToken {
symbol: SymbolId(3),
text: b"x".to_vec(),
start_byte: 4,
end_byte: 5,
},
GLRToken {
symbol: SymbolId(4),
text: b")".to_vec(),
start_byte: 5,
end_byte: 6,
},
];
let edit = GLREdit {
old_range: 4..5,
new_text: b"y".to_vec(),
old_token_range: 2..3,
new_tokens: vec![],
old_tokens: vec![],
old_forest: None,
};
let (start, end) = detector.find_boundaries(&edit, &tokens);
assert!(start <= 2);
assert!(end >= 3);
}
}