use super::{Range, Result};
use crate::core::errors::EditorError;
use ass_core::parser::{script::ScriptDeltaOwned, Script};
#[cfg(feature = "std")]
use std::borrow::Cow;
#[cfg(not(feature = "std"))]
use alloc::{borrow::Cow, format, string::String, string::ToString};
#[cfg(not(feature = "std"))]
use alloc::vec::Vec;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DocumentChange<'a> {
pub range: Range,
pub new_text: Cow<'a, str>,
pub old_text: Cow<'a, str>,
#[cfg(feature = "std")]
pub timestamp: std::time::Instant,
pub change_id: u64,
}
#[derive(Debug)]
pub struct IncrementalParser {
cached_script: Option<String>,
pending_changes: Vec<DocumentChange<'static>>,
next_change_id: u64,
reparse_threshold: usize,
bytes_changed: usize,
}
impl Default for IncrementalParser {
fn default() -> Self {
Self::new()
}
}
impl IncrementalParser {
pub fn new() -> Self {
Self {
cached_script: None,
pending_changes: Vec::new(),
next_change_id: 1,
reparse_threshold: 10_000, bytes_changed: 0,
}
}
pub fn set_reparse_threshold(&mut self, threshold: usize) {
self.reparse_threshold = threshold;
}
pub fn initialize_cache(&mut self, content: &str) {
self.cached_script = Some(content.to_string());
self.pending_changes.clear();
self.bytes_changed = 0;
}
pub fn has_cached_script(&self) -> bool {
self.cached_script.is_some()
}
pub fn with_cached_script<F, R>(&self, f: F) -> Result<R>
where
F: FnOnce(&Script) -> Result<R>,
{
let cached = self
.cached_script
.as_ref()
.ok_or_else(|| EditorError::command_failed("No cached script available"))?;
let script = Script::parse(cached).map_err(EditorError::from)?;
f(&script)
}
pub fn apply_change(
&mut self,
document_text: &str,
range: Range,
new_text: &str,
) -> Result<ScriptDeltaOwned> {
if self.cached_script.is_none() || self.bytes_changed >= self.reparse_threshold {
return self.full_reparse(document_text);
}
if range.end.offset > document_text.len() || range.start.offset > range.end.offset {
return Err(EditorError::InvalidRange {
start: range.start.offset,
end: range.end.offset,
length: document_text.len(),
});
}
let start_is_valid = range.start.offset == 0
|| range.start.offset == document_text.len()
|| document_text.is_char_boundary(range.start.offset);
let end_is_valid = range.end.offset == 0
|| range.end.offset == document_text.len()
|| document_text.is_char_boundary(range.end.offset);
if !start_is_valid || !end_is_valid {
return Err(EditorError::command_failed(
"Edit range is not on valid UTF-8 character boundaries",
));
}
let old_text = &document_text[range.start.offset..range.end.offset];
let (start_byte, end_byte) = (range.start.offset, range.end.offset);
let change = DocumentChange {
range,
new_text: Cow::Owned(new_text.to_string()),
old_text: Cow::Owned(old_text.to_string()),
#[cfg(feature = "std")]
timestamp: std::time::Instant::now(),
change_id: self.next_change_id,
};
self.next_change_id += 1;
let change_size = new_text.len().abs_diff(old_text.len());
self.bytes_changed += change_size;
self.pending_changes.push(change);
let byte_range = start_byte..end_byte;
let cached = self.cached_script.as_ref().ok_or_else(|| {
EditorError::command_failed("Cached script unavailable for incremental parsing")
})?;
let script = Script::parse(cached).map_err(EditorError::from)?;
match script.parse_partial(byte_range, new_text) {
Ok(delta) => {
self.update_cached_script(range, new_text)?;
Ok(delta)
}
Err(_e) => {
self.pending_changes.pop(); self.bytes_changed -= change_size;
#[cfg(feature = "std")]
eprintln!("Incremental parse failed, falling back to full parse: {_e}");
self.full_reparse(document_text)
}
}
}
pub fn full_reparse(&mut self, content: &str) -> Result<ScriptDeltaOwned> {
let new_script = Script::parse(content).map_err(EditorError::from)?;
let delta = if let Some(cached_content) = &self.cached_script {
let old_script = Script::parse(cached_content).map_err(EditorError::from)?;
let delta = ass_core::parser::calculate_delta(&old_script, &new_script);
let mut owned_delta = ScriptDeltaOwned {
added: Vec::new(),
modified: Vec::new(),
removed: Vec::new(),
new_issues: new_script.issues().to_vec(),
};
for section in delta.added {
owned_delta.added.push(format!("{section:?}"));
}
for (idx, section) in delta.modified {
owned_delta.modified.push((idx, format!("{section:?}")));
}
owned_delta.removed = delta.removed;
owned_delta
} else {
ScriptDeltaOwned {
added: new_script
.sections()
.iter()
.map(|s| format!("{s:?}"))
.collect(),
modified: Vec::new(),
removed: Vec::new(),
new_issues: new_script.issues().to_vec(),
}
};
self.cached_script = Some(content.to_string());
self.pending_changes.clear();
self.bytes_changed = 0;
Ok(delta)
}
pub fn clear_cache(&mut self) {
self.cached_script = None;
self.pending_changes.clear();
self.bytes_changed = 0;
self.next_change_id = 1;
}
pub fn pending_changes(&self) -> &[DocumentChange<'static>] {
&self.pending_changes
}
pub fn should_reparse(&self) -> bool {
self.bytes_changed >= self.reparse_threshold || self.pending_changes.len() > 50
}
fn update_cached_script(&mut self, range: Range, new_text: &str) -> Result<()> {
if let Some(cached) = &mut self.cached_script {
if range.start.offset > cached.len() || range.end.offset > cached.len() {
return Err(EditorError::InvalidRange {
start: range.start.offset,
end: range.end.offset,
length: cached.len(),
});
}
if !cached.is_char_boundary(range.start.offset)
|| !cached.is_char_boundary(range.end.offset)
{
return Err(EditorError::command_failed(
"Cache update range is not on valid UTF-8 character boundaries",
));
}
let mut result = String::with_capacity(
cached.len() - (range.end.offset - range.start.offset) + new_text.len(),
);
result.push_str(&cached[..range.start.offset]);
result.push_str(new_text);
if range.end.offset < cached.len() {
result.push_str(&cached[range.end.offset..]);
}
*cached = result;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::Position;
#[cfg(not(feature = "std"))]
use alloc::string::ToString;
#[cfg(not(feature = "std"))]
#[test]
fn test_incremental_parser_creation() {
let parser = IncrementalParser::new();
assert!(parser.cached_script.is_none());
assert!(parser.pending_changes.is_empty());
assert_eq!(parser.bytes_changed, 0);
}
#[test]
fn test_document_change_tracking() {
let change = DocumentChange {
range: Range::new(Position::new(0), Position::new(5)),
new_text: Cow::Borrowed("Hello"),
old_text: Cow::Borrowed("World"),
#[cfg(feature = "std")]
timestamp: std::time::Instant::now(),
change_id: 1,
};
assert_eq!(change.new_text, "Hello");
assert_eq!(change.old_text, "World");
assert_eq!(change.change_id, 1);
}
#[test]
fn test_should_reparse_threshold() {
let mut parser = IncrementalParser::new();
parser.set_reparse_threshold(100);
assert!(!parser.should_reparse());
parser.bytes_changed = 101;
assert!(parser.should_reparse());
}
#[test]
fn test_clear_cache() {
let mut parser = IncrementalParser::new();
parser.cached_script = Some("test".to_string());
parser.bytes_changed = 100;
parser.next_change_id = 5;
parser.clear_cache();
assert!(parser.cached_script.is_none());
assert_eq!(parser.bytes_changed, 0);
assert_eq!(parser.next_change_id, 1);
}
#[test]
fn test_error_recovery() {
let mut parser = IncrementalParser::new();
let content = "[Script Info]\nTitle: Test";
let result = parser.apply_change(
content,
Range::new(Position::new(0), Position::new(5)),
"New",
);
assert!(result.is_ok());
assert!(parser.cached_script.is_some());
parser.set_reparse_threshold(10);
parser.bytes_changed = 11;
let result = parser.apply_change(
content,
Range::new(Position::new(0), Position::new(5)),
"Changed",
);
assert!(result.is_ok());
assert_eq!(parser.bytes_changed, 0); }
}