Skip to main content

fresh/model/buffer/
persistence.rs

1//! Persistence state for a `TextBuffer`.
2//!
3//! Owns the six former flat fields that describe "where this buffer
4//! lives on disk and whether its in-memory state has diverged from
5//! what's on disk": the filesystem handle, the optional on-disk path,
6//! the modified / recovery-pending dirty flags, the saved-root
7//! snapshot of the piece tree at last save, and the on-disk file
8//! size at last save.
9//!
10//! The `mark_dirty` method is the single choke-point for flipping
11//! both dirty flags. `TextBuffer::mark_content_modified` calls it and
12//! then bumps the top-level version counter.
13
14use crate::model::filesystem::FileSystem;
15use crate::model::piece_tree::{BufferLocation, LeafData, PieceTree, PieceTreeNode, StringBuffer};
16use crate::model::piece_tree_diff::PieceTreeDiff;
17use std::path::{Path, PathBuf};
18use std::sync::Arc;
19
20/// Filesystem + save-state for one `TextBuffer`.
21pub struct Persistence {
22    /// Filesystem abstraction for file I/O operations.
23    fs: Arc<dyn FileSystem + Send + Sync>,
24
25    /// Optional file path for persistence.
26    file_path: Option<PathBuf>,
27
28    /// Has the buffer been modified since last save?
29    modified: bool,
30
31    /// Does the buffer have unsaved changes for recovery auto-save?
32    ///
33    /// Separate from `modified` because recovery auto-save doesn't
34    /// clear `modified` (buffer still differs from on-disk file).
35    recovery_pending: bool,
36
37    /// Snapshot of the piece tree root at last save (shared via Arc).
38    saved_root: Arc<PieceTreeNode>,
39
40    /// The file size on disk after the last save.
41    ///
42    /// Used for chunked recovery to know the original file size for
43    /// reconstruction. Updated when loading from file or after
44    /// saving.
45    saved_file_size: Option<usize>,
46}
47
48impl Persistence {
49    pub fn new(
50        fs: Arc<dyn FileSystem + Send + Sync>,
51        file_path: Option<PathBuf>,
52        saved_root: Arc<PieceTreeNode>,
53        saved_file_size: Option<usize>,
54    ) -> Self {
55        Self {
56            fs,
57            file_path,
58            modified: false,
59            recovery_pending: false,
60            saved_root,
61            saved_file_size,
62        }
63    }
64
65    pub fn fs(&self) -> &Arc<dyn FileSystem + Send + Sync> {
66        &self.fs
67    }
68
69    pub fn set_fs(&mut self, fs: Arc<dyn FileSystem + Send + Sync>) {
70        self.fs = fs;
71    }
72
73    pub fn file_path(&self) -> Option<&Path> {
74        self.file_path.as_deref()
75    }
76
77    pub fn file_path_owned(&self) -> Option<PathBuf> {
78        self.file_path.clone()
79    }
80
81    pub fn set_file_path(&mut self, path: PathBuf) {
82        self.file_path = Some(path);
83    }
84
85    pub fn clear_file_path(&mut self) {
86        self.file_path = None;
87    }
88
89    pub fn is_modified(&self) -> bool {
90        self.modified
91    }
92
93    pub fn set_modified(&mut self, modified: bool) {
94        self.modified = modified;
95    }
96
97    pub fn clear_modified(&mut self) {
98        self.modified = false;
99    }
100
101    pub fn is_recovery_pending(&self) -> bool {
102        self.recovery_pending
103    }
104
105    pub fn set_recovery_pending(&mut self, pending: bool) {
106        self.recovery_pending = pending;
107    }
108
109    /// The single choke-point for flipping the two dirty flags.
110    ///
111    /// Called from `TextBuffer::mark_content_modified` after every
112    /// edit. Do **not** call `set_modified`/`set_recovery_pending`
113    /// directly from edit paths — go through the orchestrator on
114    /// `TextBuffer` so the version counter bumps too.
115    pub(super) fn mark_dirty(&mut self) {
116        self.modified = true;
117        self.recovery_pending = true;
118    }
119
120    pub fn saved_root(&self) -> &Arc<PieceTreeNode> {
121        &self.saved_root
122    }
123
124    pub fn set_saved_root(&mut self, root: Arc<PieceTreeNode>) {
125        self.saved_root = root;
126    }
127
128    pub fn saved_file_size(&self) -> Option<usize> {
129        self.saved_file_size
130    }
131
132    pub fn set_saved_file_size(&mut self, size: Option<usize>) {
133        self.saved_file_size = size;
134    }
135
136    // ---------- snapshot / diff operations ----------
137
138    /// Replace the saved snapshot with the current piece tree and clear
139    /// the modified flag.  Call this after a successful save.
140    pub fn mark_saved_snapshot(&mut self, piece_tree: &PieceTree) {
141        self.saved_root = piece_tree.root();
142        self.modified = false;
143    }
144
145    /// Refresh the saved root to match the current tree structure
146    /// without clearing the modified flag.  Call this after
147    /// structural-only changes (e.g. `chunk_split_and_load` during
148    /// search scan) so that `diff_since_saved` can take the fast
149    /// `Arc::ptr_eq` path.
150    pub fn refresh_saved_root_if_unmodified(&mut self, piece_tree: &PieceTree) {
151        if !self.modified {
152            self.saved_root = piece_tree.root();
153        }
154    }
155
156    /// Apply a chunk-load buffer replacement to `saved_root`.
157    ///
158    /// When viewport loading converts a `Stored(buffer_id)` piece to
159    /// `Added(new_buffer_id)` in the current tree and the buffer is
160    /// already modified, we must apply the same transformation to
161    /// `saved_root` so that `diff_since_saved` can match
162    /// loaded-but-unedited regions by `(location, offset)` identity.
163    pub fn apply_chunk_load_to_saved_root(
164        &mut self,
165        old_buffer_id: usize,
166        chunk_offset_in_buffer: usize,
167        chunk_bytes: usize,
168        new_buffer_id: usize,
169    ) {
170        let mut leaves = Vec::new();
171        self.saved_root.collect_leaves(&mut leaves);
172
173        let mut modified = false;
174        let mut new_leaves: Vec<LeafData> = Vec::with_capacity(leaves.len() + 2);
175
176        for leaf in &leaves {
177            if leaf.location.buffer_id() != old_buffer_id {
178                new_leaves.push(*leaf);
179                continue;
180            }
181
182            let leaf_start = leaf.offset;
183            let leaf_end = leaf.offset + leaf.bytes;
184            let chunk_start = chunk_offset_in_buffer;
185            let chunk_end = chunk_offset_in_buffer + chunk_bytes;
186
187            // Check if this leaf overlaps the chunk range
188            if chunk_start >= leaf_end || chunk_end <= leaf_start {
189                // No overlap — keep as-is
190                new_leaves.push(*leaf);
191                continue;
192            }
193
194            modified = true;
195
196            // Prefix: portion of this leaf before the chunk
197            if chunk_start > leaf_start {
198                new_leaves.push(LeafData::new(
199                    leaf.location,
200                    leaf.offset,
201                    chunk_start - leaf_start,
202                    None, // line feed count unknown after split
203                ));
204            }
205
206            // The chunk itself — replaced with Added(new_buffer_id)
207            let actual_start = chunk_start.max(leaf_start);
208            let actual_end = chunk_end.min(leaf_end);
209            let offset_in_chunk = actual_start - chunk_start;
210            new_leaves.push(LeafData::new(
211                BufferLocation::Added(new_buffer_id),
212                offset_in_chunk,
213                actual_end - actual_start,
214                None,
215            ));
216
217            // Suffix: portion of this leaf after the chunk
218            if chunk_end < leaf_end {
219                new_leaves.push(LeafData::new(
220                    leaf.location,
221                    chunk_end,
222                    leaf_end - chunk_end,
223                    None,
224                ));
225            }
226        }
227
228        if modified {
229            self.saved_root = PieceTree::from_leaves(&new_leaves).root();
230        }
231    }
232
233    /// Diff the current piece tree against the last saved snapshot.
234    ///
235    /// Two-phase algorithm:
236    /// - Phase 1: structure-based diff to find changed byte ranges
237    ///   (O(num_leaves)).
238    /// - Phase 2: for small changed regions, compare actual bytes
239    ///   (so paste-after-delete shows as no change).
240    ///
241    /// `piece_tree` + `buffers` are passed by reference — this
242    /// method only reads them.
243    pub fn diff_since_saved(
244        &self,
245        piece_tree: &PieceTree,
246        buffers: &[StringBuffer],
247    ) -> PieceTreeDiff {
248        // Fast path: if the buffer hasn't been modified since loading
249        // or saving, content is identical by definition.
250        if !self.modified {
251            return PieceTreeDiff {
252                equal: true,
253                byte_ranges: Vec::new(),
254                nodes_visited: 0,
255            };
256        }
257
258        // Quick check: Arc::ptr_eq on tree roots.
259        if Arc::ptr_eq(&self.saved_root, &piece_tree.root()) {
260            return PieceTreeDiff {
261                equal: true,
262                byte_ranges: Vec::new(),
263                nodes_visited: 0,
264            };
265        }
266
267        // Phase 1: structure-based diff to find which byte ranges
268        // differ. O(number of leaves).
269        let structure_diff = self.diff_trees_by_structure(piece_tree);
270
271        // If structure says trees are equal, we're done.
272        if structure_diff.equal {
273            return structure_diff;
274        }
275
276        // Phase 2: for small changed regions, verify with actual
277        // content comparison (handles paste-after-delete).
278        let total_changed_bytes: usize = structure_diff
279            .byte_ranges
280            .iter()
281            .map(|r| r.end.saturating_sub(r.start))
282            .sum();
283
284        // Only verify if the changed region is reasonably small.
285        const MAX_VERIFY_BYTES: usize = 64 * 1024;
286
287        if total_changed_bytes <= MAX_VERIFY_BYTES && !structure_diff.byte_ranges.is_empty() {
288            if self.verify_content_differs_in_ranges(
289                &structure_diff.byte_ranges,
290                piece_tree,
291                buffers,
292            ) {
293                return structure_diff;
294            } else {
295                return PieceTreeDiff {
296                    equal: true,
297                    byte_ranges: Vec::new(),
298                    nodes_visited: structure_diff.nodes_visited,
299                };
300            }
301        }
302
303        // Large changes: trust the structure diff.
304        structure_diff
305    }
306
307    /// Structure-based diff comparing piece tree leaves
308    pub fn diff_trees_by_structure(&self, piece_tree: &PieceTree) -> PieceTreeDiff {
309        crate::model::piece_tree_diff::diff_piece_trees(&self.saved_root, &piece_tree.root())
310    }
311
312    /// Check if the actual byte content differs in the given ranges.
313    fn verify_content_differs_in_ranges(
314        &self,
315        byte_ranges: &[std::ops::Range<usize>],
316        piece_tree: &PieceTree,
317        buffers: &[StringBuffer],
318    ) -> bool {
319        let saved_bytes = tree_total_bytes(&self.saved_root);
320        let current_bytes = piece_tree.total_bytes();
321
322        // Different total sizes → content definitely differs.
323        if saved_bytes != current_bytes {
324            return true;
325        }
326
327        for range in byte_ranges {
328            if range.start >= range.end {
329                continue;
330            }
331
332            let saved_slice =
333                extract_range_from_tree(&self.saved_root, range.start, range.end, buffers);
334            let current_slice = get_text_range(piece_tree, buffers, range.start, range.len());
335
336            match (saved_slice, current_slice) {
337                (Some(saved), Some(current)) => {
338                    if saved != current {
339                        return true;
340                    }
341                }
342                _ => {
343                    // Couldn't read content, assume it differs to be safe.
344                    return true;
345                }
346            }
347        }
348
349        false
350    }
351}
352
353// ---------- private free-fn helpers over borrowed storage ----------
354
355/// Total bytes in a tree rooted at `root`.
356fn tree_total_bytes(root: &Arc<PieceTreeNode>) -> usize {
357    match root.as_ref() {
358        PieceTreeNode::Internal {
359            left_bytes, right, ..
360        } => left_bytes + tree_total_bytes(right),
361        PieceTreeNode::Leaf { bytes, .. } => *bytes,
362    }
363}
364
365/// Extract a byte range from an arbitrary tree root.
366fn extract_range_from_tree(
367    root: &Arc<PieceTreeNode>,
368    start: usize,
369    end: usize,
370    buffers: &[StringBuffer],
371) -> Option<Vec<u8>> {
372    let mut result = Vec::with_capacity(end.saturating_sub(start));
373    collect_range_from_node(root, start, end, 0, buffers, &mut result)?;
374    Some(result)
375}
376
377fn collect_range_from_node(
378    node: &Arc<PieceTreeNode>,
379    range_start: usize,
380    range_end: usize,
381    node_offset: usize,
382    buffers: &[StringBuffer],
383    result: &mut Vec<u8>,
384) -> Option<()> {
385    match node.as_ref() {
386        PieceTreeNode::Internal {
387            left_bytes,
388            left,
389            right,
390            ..
391        } => {
392            let left_end = node_offset + left_bytes;
393
394            if range_start < left_end {
395                collect_range_from_node(
396                    left,
397                    range_start,
398                    range_end,
399                    node_offset,
400                    buffers,
401                    result,
402                )?;
403            }
404
405            if range_end > left_end {
406                collect_range_from_node(right, range_start, range_end, left_end, buffers, result)?;
407            }
408        }
409        PieceTreeNode::Leaf {
410            location,
411            offset,
412            bytes,
413            ..
414        } => {
415            let node_end = node_offset + bytes;
416
417            if range_start < node_end && range_end > node_offset {
418                let buf = buffers.get(location.buffer_id())?;
419                let data = buf.get_data()?;
420
421                let leaf_start = range_start.saturating_sub(node_offset);
422                let leaf_end = (range_end - node_offset).min(*bytes);
423
424                if leaf_start < leaf_end {
425                    let slice = data.get(*offset + leaf_start..*offset + leaf_end)?;
426                    result.extend_from_slice(slice);
427                }
428            }
429        }
430    }
431    Some(())
432}
433
434/// Read-only equivalent of `TextBuffer::get_text_range`, duplicated
435/// here so `verify_content_differs_in_ranges` doesn't need access to
436/// `TextBuffer`.  Returns `None` if any required buffer is unloaded.
437fn get_text_range(
438    piece_tree: &PieceTree,
439    buffers: &[StringBuffer],
440    offset: usize,
441    bytes: usize,
442) -> Option<Vec<u8>> {
443    if bytes == 0 {
444        return Some(Vec::new());
445    }
446
447    let mut result = Vec::with_capacity(bytes);
448    let end_offset = offset + bytes;
449    let mut collected = 0;
450
451    for piece_view in piece_tree.iter_pieces_in_range(offset, end_offset) {
452        let buffer_id = piece_view.location.buffer_id();
453        if let Some(buffer) = buffers.get(buffer_id) {
454            let piece_start_in_doc = piece_view.doc_offset;
455            let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
456
457            let read_start = offset.max(piece_start_in_doc);
458            let read_end = end_offset.min(piece_end_in_doc);
459
460            if read_end > read_start {
461                let offset_in_piece = read_start - piece_start_in_doc;
462                let bytes_to_read = read_end - read_start;
463
464                let buffer_start = piece_view.buffer_offset + offset_in_piece;
465                let buffer_end = buffer_start + bytes_to_read;
466
467                let data = buffer.get_data()?;
468
469                if buffer_end <= data.len() {
470                    result.extend_from_slice(&data[buffer_start..buffer_end]);
471                    collected += bytes_to_read;
472
473                    if collected >= bytes {
474                        break;
475                    }
476                }
477            }
478        }
479    }
480
481    Some(result)
482}