edit/buffer/
gap_buffer.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4use std::ops::Range;
5use std::ptr::{self, NonNull};
6use std::slice;
7
8use crate::document::{ReadableDocument, WriteableDocument};
9use crate::helpers::*;
10use crate::{apperr, sys};
11
12#[cfg(target_pointer_width = "32")]
13const LARGE_CAPACITY: usize = 128 * MEBI;
14#[cfg(target_pointer_width = "64")]
15const LARGE_CAPACITY: usize = 4 * GIBI;
16const LARGE_ALLOC_CHUNK: usize = 64 * KIBI;
17const LARGE_GAP_CHUNK: usize = 4 * KIBI;
18
19const SMALL_CAPACITY: usize = 128 * KIBI;
20const SMALL_ALLOC_CHUNK: usize = 256;
21const SMALL_GAP_CHUNK: usize = 16;
22
23// TODO: Instead of having a specialization for small buffers here,
24// tui.rs could also just keep a MRU set of large buffers around.
25enum BackingBuffer {
26    VirtualMemory(NonNull<u8>, usize),
27    Vec(Vec<u8>),
28}
29
30impl Drop for BackingBuffer {
31    fn drop(&mut self) {
32        unsafe {
33            if let Self::VirtualMemory(ptr, reserve) = *self {
34                sys::virtual_release(ptr, reserve);
35            }
36        }
37    }
38}
39
40/// Most people know how `Vec<T>` works: It has some spare capacity at the end,
41/// so that pushing into it doesn't reallocate every single time. A gap buffer
42/// is the same thing, but the spare capacity can be anywhere in the buffer.
43/// This variant is optimized for large buffers and uses virtual memory.
44pub struct GapBuffer {
45    /// Pointer to the buffer.
46    text: NonNull<u8>,
47    /// Maximum size of the buffer, including gap.
48    reserve: usize,
49    /// Size of the buffer, including gap.
50    commit: usize,
51    /// Length of the stored text, NOT including gap.
52    text_length: usize,
53    /// Gap offset.
54    gap_off: usize,
55    /// Gap length.
56    gap_len: usize,
57    /// Increments every time the buffer is modified.
58    generation: u32,
59    /// If `Vec(..)`, the buffer is optimized for small amounts of text
60    /// and uses the standard heap. Otherwise, it uses virtual memory.
61    buffer: BackingBuffer,
62}
63
64impl GapBuffer {
65    pub fn new(small: bool) -> apperr::Result<Self> {
66        let reserve;
67        let buffer;
68        let text;
69
70        if small {
71            reserve = SMALL_CAPACITY;
72            text = NonNull::dangling();
73            buffer = BackingBuffer::Vec(Vec::new());
74        } else {
75            reserve = LARGE_CAPACITY;
76            text = unsafe { sys::virtual_reserve(reserve)? };
77            buffer = BackingBuffer::VirtualMemory(text, reserve);
78        }
79
80        Ok(Self {
81            text,
82            reserve,
83            commit: 0,
84            text_length: 0,
85            gap_off: 0,
86            gap_len: 0,
87            generation: 0,
88            buffer,
89        })
90    }
91
92    #[allow(clippy::len_without_is_empty)]
93    pub fn len(&self) -> usize {
94        self.text_length
95    }
96
97    pub fn generation(&self) -> u32 {
98        self.generation
99    }
100
101    pub fn set_generation(&mut self, generation: u32) {
102        self.generation = generation;
103    }
104
105    /// WARNING: The returned slice must not necessarily be the same length as `len` (due to OOM).
106    pub fn allocate_gap(&mut self, off: usize, len: usize, delete: usize) -> &mut [u8] {
107        // Sanitize parameters
108        let off = off.min(self.text_length);
109        let delete = delete.min(self.text_length - off);
110
111        // Move the existing gap if it exists
112        if off != self.gap_off {
113            self.move_gap(off);
114        }
115
116        // Delete the text
117        if delete > 0 {
118            self.delete_text(delete);
119        }
120
121        // Enlarge the gap if needed
122        if len > self.gap_len {
123            self.enlarge_gap(len);
124        }
125
126        self.generation = self.generation.wrapping_add(1);
127        unsafe { slice::from_raw_parts_mut(self.text.add(self.gap_off).as_ptr(), self.gap_len) }
128    }
129
130    fn move_gap(&mut self, off: usize) {
131        if self.gap_len > 0 {
132            //
133            //                       v gap_off
134            // left:  |ABCDEFGHIJKLMN   OPQRSTUVWXYZ|
135            //        |ABCDEFGHI   JKLMNOPQRSTUVWXYZ|
136            //                  ^ off
137            //        move: JKLMN
138            //
139            //                       v gap_off
140            // !left: |ABCDEFGHIJKLMN   OPQRSTUVWXYZ|
141            //        |ABCDEFGHIJKLMNOPQRS   TUVWXYZ|
142            //                            ^ off
143            //        move: OPQRS
144            //
145            let left = off < self.gap_off;
146            let move_src = if left { off } else { self.gap_off + self.gap_len };
147            let move_dst = if left { off + self.gap_len } else { self.gap_off };
148            let move_len = if left { self.gap_off - off } else { off - self.gap_off };
149
150            unsafe { self.text.add(move_src).copy_to(self.text.add(move_dst), move_len) };
151
152            if cfg!(debug_assertions) {
153                // Fill the moved-out bytes with 0xCD to make debugging easier.
154                unsafe { self.text.add(off).write_bytes(0xCD, self.gap_len) };
155            }
156        }
157
158        self.gap_off = off;
159    }
160
161    fn delete_text(&mut self, delete: usize) {
162        if cfg!(debug_assertions) {
163            // Fill the deleted bytes with 0xCD to make debugging easier.
164            unsafe { self.text.add(self.gap_off + self.gap_len).write_bytes(0xCD, delete) };
165        }
166
167        self.gap_len += delete;
168        self.text_length -= delete;
169    }
170
171    fn enlarge_gap(&mut self, len: usize) {
172        let gap_chunk;
173        let alloc_chunk;
174
175        if matches!(self.buffer, BackingBuffer::VirtualMemory(..)) {
176            gap_chunk = LARGE_GAP_CHUNK;
177            alloc_chunk = LARGE_ALLOC_CHUNK;
178        } else {
179            gap_chunk = SMALL_GAP_CHUNK;
180            alloc_chunk = SMALL_ALLOC_CHUNK;
181        }
182
183        let gap_len_old = self.gap_len;
184        let gap_len_new = (len + gap_chunk + gap_chunk - 1) & !(gap_chunk - 1);
185
186        let bytes_old = self.commit;
187        let bytes_new = self.text_length + gap_len_new;
188
189        if bytes_new > bytes_old {
190            let bytes_new = (bytes_new + alloc_chunk - 1) & !(alloc_chunk - 1);
191
192            if bytes_new > self.reserve {
193                return;
194            }
195
196            match &mut self.buffer {
197                BackingBuffer::VirtualMemory(ptr, _) => unsafe {
198                    if sys::virtual_commit(ptr.add(bytes_old), bytes_new - bytes_old).is_err() {
199                        return;
200                    }
201                },
202                BackingBuffer::Vec(v) => {
203                    v.resize(bytes_new, 0);
204                    self.text = unsafe { NonNull::new_unchecked(v.as_mut_ptr()) };
205                }
206            }
207
208            self.commit = bytes_new;
209        }
210
211        let gap_beg = unsafe { self.text.add(self.gap_off) };
212        unsafe {
213            ptr::copy(
214                gap_beg.add(gap_len_old).as_ptr(),
215                gap_beg.add(gap_len_new).as_ptr(),
216                self.text_length - self.gap_off,
217            )
218        };
219
220        if cfg!(debug_assertions) {
221            // Fill the moved-out bytes with 0xCD to make debugging easier.
222            unsafe { gap_beg.add(gap_len_old).write_bytes(0xCD, gap_len_new - gap_len_old) };
223        }
224
225        self.gap_len = gap_len_new;
226    }
227
228    pub fn commit_gap(&mut self, len: usize) {
229        assert!(len <= self.gap_len);
230        self.text_length += len;
231        self.gap_off += len;
232        self.gap_len -= len;
233    }
234
235    pub fn replace(&mut self, range: Range<usize>, src: &[u8]) {
236        let gap = self.allocate_gap(range.start, src.len(), range.end.saturating_sub(range.start));
237        let len = slice_copy_safe(gap, src);
238        self.commit_gap(len);
239    }
240
241    pub fn clear(&mut self) {
242        self.gap_off = 0;
243        self.gap_len += self.text_length;
244        self.generation = self.generation.wrapping_add(1);
245        self.text_length = 0;
246    }
247
248    pub fn extract_raw(&self, range: Range<usize>, out: &mut Vec<u8>, mut out_off: usize) {
249        let end = range.end.min(self.text_length);
250        let mut beg = range.start.min(end);
251        out_off = out_off.min(out.len());
252
253        if beg >= end {
254            return;
255        }
256
257        out.reserve(end - beg);
258
259        while beg < end {
260            let chunk = self.read_forward(beg);
261            let chunk = &chunk[..chunk.len().min(end - beg)];
262            out.replace_range(out_off..out_off, chunk);
263            beg += chunk.len();
264            out_off += chunk.len();
265        }
266    }
267
268    /// Replaces the entire buffer contents with the given `text`.
269    /// The method is optimized for the case where the given `text` already matches
270    /// the existing contents. Returns `true` if the buffer contents were changed.
271    pub fn copy_from(&mut self, src: &dyn ReadableDocument) -> bool {
272        let mut off = 0;
273
274        // Find the position at which the contents change.
275        loop {
276            let dst_chunk = self.read_forward(off);
277            let src_chunk = src.read_forward(off);
278
279            let dst_len = dst_chunk.len();
280            let src_len = src_chunk.len();
281            let len = dst_len.min(src_len);
282            let mismatch = dst_chunk[..len] != src_chunk[..len];
283
284            if mismatch {
285                break; // The contents differ.
286            }
287            if len == 0 {
288                if dst_len == src_len {
289                    return false; // Both done simultaneously. -> Done.
290                }
291                break; // One of the two is shorter.
292            }
293
294            off += len;
295        }
296
297        // Update the buffer starting at `off`.
298        loop {
299            let chunk = src.read_forward(off);
300            self.replace(off..usize::MAX, chunk);
301            off += chunk.len();
302
303            // No more data to copy -> Done. By checking this _after_ the replace()
304            // call, we ensure that the initial `off..usize::MAX` range is deleted.
305            // This fixes going from some buffer contents to being empty.
306            if chunk.is_empty() {
307                return true;
308            }
309        }
310    }
311
312    /// Copies the contents of the buffer into a string.
313    pub fn copy_into(&self, dst: &mut dyn WriteableDocument) {
314        let mut beg = 0;
315        let mut off = 0;
316
317        while {
318            let chunk = self.read_forward(off);
319
320            // The first write will be 0..usize::MAX and effectively clear() the destination.
321            // Every subsequent write will be usize::MAX..usize::MAX and thus effectively append().
322            dst.replace(beg..usize::MAX, chunk);
323            beg = usize::MAX;
324
325            off += chunk.len();
326            off < self.text_length
327        } {}
328    }
329}
330
331impl ReadableDocument for GapBuffer {
332    fn read_forward(&self, off: usize) -> &[u8] {
333        let off = off.min(self.text_length);
334        let beg;
335        let len;
336
337        if off < self.gap_off {
338            // Cursor is before the gap: We can read until the start of the gap.
339            beg = off;
340            len = self.gap_off - off;
341        } else {
342            // Cursor is after the gap: We can read until the end of the buffer.
343            beg = off + self.gap_len;
344            len = self.text_length - off;
345        }
346
347        unsafe { slice::from_raw_parts(self.text.add(beg).as_ptr(), len) }
348    }
349
350    fn read_backward(&self, off: usize) -> &[u8] {
351        let off = off.min(self.text_length);
352        let beg;
353        let len;
354
355        if off <= self.gap_off {
356            // Cursor is before the gap: We can read until the beginning of the buffer.
357            beg = 0;
358            len = off;
359        } else {
360            // Cursor is after the gap: We can read until the end of the gap.
361            beg = self.gap_off + self.gap_len;
362            // The cursor_off doesn't account of the gap_len.
363            // (This allows us to move the gap without recalculating the cursor position.)
364            len = off - self.gap_off;
365        }
366
367        unsafe { slice::from_raw_parts(self.text.add(beg).as_ptr(), len) }
368    }
369}