1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
//! Buffer type's underlying data structure.

use super::Position;
use super::Range;
use std::borrow::Borrow;

/// A UTF-8 string buffer designed to minimize reallocations,
/// maintaining performance amid frequent modifications.
pub struct GapBuffer {
    data: Vec<u8>,
    gap_start: usize,
    gap_length: usize,
}

impl GapBuffer {
    /// Initializes a gap buffer with the specified data as its contents.
    ///
    /// # Examples
    ///
    /// ```
    /// use scribe::buffer::GapBuffer;
    ///
    /// let buffer = GapBuffer::new("scribe".to_string());
    /// assert_eq!(buffer.to_string(), "scribe");
    /// ```
    pub fn new(data: String) -> GapBuffer {
        let mut bytes = data.into_bytes();
        let capacity = bytes.capacity();
        let gap_start = bytes.len();
        let gap_length = capacity - gap_start;
        unsafe {
            bytes.set_len(capacity);
        }
        GapBuffer{ data: bytes, gap_start: gap_start, gap_length: gap_length }
    }

    /// Inserts the specified data into the buffer at the specified position.
    /// The buffer will reallocate if there is insufficient space. If the
    /// position is out of bounds, the buffer contents will remain unchanged.
    ///
    /// # Examples
    ///
    /// ```
    /// use scribe::buffer::GapBuffer;
    ///
    /// let mut buffer = GapBuffer::new("my buffer data".to_string());
    /// buffer.insert(" changed", &scribe::buffer::Position{ line: 0, offset: 2});
    /// assert_eq!("my changed buffer data", buffer.to_string());
    /// ```
    pub fn insert(&mut self, data: &str, position: &Position) {
        // Ensure we have the capacity to insert this data.
        if data.len() > self.gap_length {
            // We're about to add space to the end of the buffer, so move the gap
            // there beforehand so that we're essentially just increasing the
            // gap size, and preventing a split/two-segment gap.
            let offset = self.data.capacity();
            self.move_gap(offset);

            // Re-allocate the gap buffer, increasing its size.
            self.data.reserve(data.len());

            // Update the tracked gap size and tell the vector that
            // we're using all of the new space immediately.
            let capacity = self.data.capacity();
            self.gap_length = capacity - self.gap_start;
            unsafe {
                self.data.set_len(capacity);
            }
        }

        let offset = match self.find_offset(position) {
            Some(o) => o,
            None => return,
        };

        self.move_gap(offset);
        self.write_to_gap(data);
    }

    /// Returns the specified range of data from the buffer.
    /// If any part of the range does not exist, a none value will be returned.
    ///
    /// # Examples
    ///
    /// ```
    /// use scribe::buffer::{GapBuffer, Range};
    ///
    /// let buffer = GapBuffer::new("my data".to_string());
    /// let range = Range::new(
    ///   scribe::buffer::Position{ line: 0, offset: 3 },
    ///   scribe::buffer::Position{ line: 0, offset: 7}
    /// );
    ///
    /// assert_eq!(buffer.read(&range).unwrap(), "data");
    /// ```
    pub fn read(&self, range: &Range) -> Option<String> {
        // Map positions to offsets in the buffer.
        let start_offset = match self.find_offset(&range.start()) {
            Some(offset) => offset,
            None => return None,
        };
        let end_offset = match self.find_offset(&range.end()) {
            Some(offset) => offset,
            None => return None,
        };

        let data = if start_offset < self.gap_start && self.gap_start < end_offset {
            // The gap is in the middle of the range being requested.
            // Stitch the surrounding halves together to exclude it.
            let first_half = &self.data[start_offset..self.gap_start];
            let second_half = &self.data[self.gap_start+self.gap_length..end_offset];

            // Allocate a string for the first half.
            let mut data = String::from_utf8_lossy(first_half).into_owned();

            // Push the second half onto the first.
            data.push_str(String::from_utf8_lossy(second_half).borrow());

            data
        } else {
            // No gap in the way; just return the requested data range.
            String::from_utf8_lossy(&self.data[start_offset..end_offset]).into_owned()
        };

        Some(data)
    }

    /// Returns a string representation of the buffer data (without gap).
    ///
    /// # Examples
    ///
    /// ```
    /// use scribe::buffer::GapBuffer;
    ///
    /// let mut buffer = GapBuffer::new("my data".to_string());
    /// assert_eq!(buffer.to_string(), "my data");
    /// ```
    pub fn to_string(&self) -> String {
        String::from_utf8_lossy(&self.data[..self.gap_start]).to_string() +
        &*String::from_utf8_lossy(&self.data[self.gap_start+self.gap_length..])
    }

    /// Removes the specified range of data from the buffer.
    ///
    /// # Examples
    ///
    /// ```
    /// use scribe::buffer::{GapBuffer, Range};
    ///
    /// let mut buffer = GapBuffer::new("my data".to_string());
    /// let range = Range::new(
    ///   scribe::buffer::Position{ line: 0, offset: 0 },
    ///   scribe::buffer::Position{ line: 0, offset: 3}
    /// );
    ///
    /// buffer.delete(&range);
    /// assert_eq!(buffer.to_string(), "data");
    /// ```
    pub fn delete(&mut self, range: &Range) {
        let start_offset = match self.find_offset(&range.start()) {
            Some(o) => o,
            None => return,
        };
        self.move_gap(start_offset);

        match self.find_offset(&range.end()) {
            Some(offset) => {
                // Widen the gap to cover the deleted contents.
                self.gap_length = offset - self.gap_start;
            },
            None => {
                // The end of the range doesn't exist; check
                // if it's on the last line in the file.
                let start_of_next_line = Position{ line: range.end().line + 1, offset: 0 };

                match self.find_offset(&start_of_next_line) {
                    Some(offset) => {
                        // There are other lines below this range.
                        // Just remove up until the end of the line.
                        self.gap_length = offset - self.gap_start;
                    },
                    None => {
                        // We're on the last line, just get rid of the rest
                        // by extending the gap right to the end of the buffer.
                        self.gap_length = self.data.len() - self.gap_start;
                    }
                }
            }
        };
    }

    /// Checks whether or not the specified position is in bounds of the buffer data.
    ///
    /// # Examples
    ///
    /// ```
    /// use scribe::buffer::GapBuffer;
    ///
    /// let buffer = GapBuffer::new("scribe".to_string());
    /// let in_bounds = scribe::buffer::Position{ line: 0, offset: 0 };
    /// let out_of_bounds = scribe::buffer::Position{ line: 1, offset: 3 };
    ///
    /// assert_eq!(buffer.in_bounds(&in_bounds), true);
    /// assert_eq!(buffer.in_bounds(&out_of_bounds), false);
    /// ```
    pub fn in_bounds(&self, position: &Position) -> bool {
        self.find_offset(position) != None
    }

    // Maps a position to its offset equivalent in the data.
    fn find_offset(&self, position: &Position) -> Option<usize> {
        let first_half = String::from_utf8_lossy(&self.data[..self.gap_start]).to_owned();
        let mut line = 0;
        let mut line_offset = 0;

        for char_index in first_half.char_indices() {
            let (offset, character) = char_index;

            // Check to see if we've found the position yet.
            if line == position.line && line_offset == position.offset {
                return Some(offset);
            }

            // Advance the line and offset characters.
            if character == '\n' {
                line+=1;
                line_offset = 0;
            } else {
                line_offset+=1;
            }
        }

        // We didn't find the position *within* the first half, but it could
        // be right after it, which means it's right at the start of the gap.
        if line == position.line && line_offset == position.offset {
            return Some(self.gap_start+self.gap_length);
        }

        // We haven't reached the position yet, so we'll move on to the other half.
        let second_half = String::from_utf8_lossy(&self.data[self.gap_start+self.gap_length..]).to_owned();
        for char_index in second_half.char_indices() {
            let (offset, character) = char_index;

            // Check to see if we've found the position yet.
            if line == position.line && line_offset == position.offset {
                return Some(self.gap_start + self.gap_length + offset);
            }

            // Advance the line and offset characters.
            if character == '\n' {
                line+=1;
                line_offset = 0;
            } else {
                line_offset+=1;
            }
        }

        // We didn't find the position *within* the second half, but it could
        // be right after it, which means it's at the end of the buffer.
        if line == position.line && line_offset == position.offset {
            return Some(self.data.len());
        }

        None
    }

    fn move_gap(&mut self, offset: usize) {
        // We don't need to move any data if the buffer is at capacity.
        if self.gap_length == 0 {
            self.gap_start = offset;
            return;
        }

        if offset < self.gap_start {
            // Shift the gap to the left one byte at a time.
            for index in (offset..self.gap_start).rev() {
                self.data[index + self.gap_length] = self.data[index];
                self.data[index] = 0;
            }

            self.gap_start = offset;
        } else if offset > self.gap_start {
            // Shift the gap to the right one byte at a time.
            for index in self.gap_start + self.gap_length..offset {
                self.data[index-self.gap_length] = self.data[index];
                self.data[index] = 0;
            }

            // Because the offset was after the gap, its value included the
            // gap length. We must remove it to determine the starting point.
            self.gap_start = offset - self.gap_length;
        }
    }

    fn write_to_gap(&mut self, data: &str) {
        for byte in data.bytes() {
            self.data[self.gap_start] = byte;
            self.gap_start+=1;
            self.gap_length-=1;
        }
    }
}

#[cfg(test)]
mod tests {
    use buffer::{GapBuffer, Position, Range};

    #[test]
    fn move_gap_works() {
        let mut gb = GapBuffer::new("This is a test.".to_string());
        gb.move_gap(0);
        assert_eq!(gb.to_string(), "This is a test.");
    }

    #[test]
    fn inserting_at_the_start_works() {
        let mut gb = GapBuffer::new("toolkit".to_string());

        // This insert serves to move the gap to the start of the buffer.
        gb.insert(" ", &Position { line: 0, offset: 0 });
        assert_eq!(gb.to_string(), " toolkit");

        // We insert enough data (more than twice original capacity) to force
        // a re-allocation, which, with the gap at the start and when handled
        // incorrectly, will create a split/two-segment gap. Bad news.
        gb.insert("scribe text", &Position { line: 0, offset: 0 });
        assert_eq!(gb.to_string(), "scribe text toolkit");
    }

    #[test]
    fn inserting_in_the_middle_works() {
        let mut gb = GapBuffer::new("    editor".to_string());

        // Same deal as above "at the start" test, where we want to move
        // the gap into the middle and then force a reallocation to check
        // that pre-allocation gap shifting is working correctly.
        gb.insert(" ", &Position { line: 0, offset: 4 });
        gb.insert("scribe", &Position { line: 0, offset: 4 });
        assert_eq!(gb.to_string(), "    scribe editor");
    }

    #[test]
    fn inserting_at_the_end_works() {
        let mut gb = GapBuffer::new("This is a test.".to_string());
        gb.insert(" Seriously.", &Position { line: 0, offset: 15 });
        assert_eq!(gb.to_string(), "This is a test. Seriously.");
    }

    #[test]
    fn inserting_in_different_spots_twice_works() {
        let mut gb = GapBuffer::new("This is a test.".to_string());
        gb.insert("Hi. ", &Position { line: 0, offset: 0 });
        gb.insert(" Thank you.", &Position { line: 0, offset: 19 });
        assert_eq!(gb.to_string(), "Hi. This is a test. Thank you.");
    }

    #[test]
    fn inserting_at_an_invalid_position_does_nothing() {
        let mut gb = GapBuffer::new("This is a test.".to_string());
        gb.insert(" Seriously.", &Position { line: 0, offset: 35 });
        assert_eq!(gb.to_string(), "This is a test.");
    }

    #[test]
    fn deleting_works() {
        let mut gb = GapBuffer::new("This is a test.\nSee what happens.".to_string());
        let start = Position{ line: 0, offset: 8 };
        let end = Position{ line: 1, offset: 4 };
        gb.delete(&Range::new(start, end));
        assert_eq!(gb.to_string(), "This is what happens.");
    }

    #[test]
    fn inserting_then_deleting_at_the_start_works() {
        let mut gb = GapBuffer::new(String::new());
        gb.insert("This is a test.", &Position{ line: 0, offset: 0});
        let start = Position{ line: 0, offset: 0 };
        let end = Position{ line: 0, offset: 1 };
        gb.delete(&Range::new(start, end));
        assert_eq!(gb.to_string(), "his is a test.");
    }

    #[test]
    fn deleting_immediately_after_the_end_of_the_gap_widens_the_gap() {
        let mut gb = GapBuffer::new("This is a test.".to_string());
        let mut start = Position{ line: 0, offset: 8 };
        let mut end = Position{ line: 0, offset: 9 };
        gb.delete(&Range::new(start, end));
        assert_eq!(gb.to_string(), "This is  test.");
        assert_eq!(gb.gap_length, 1);

        start = Position{ line: 0, offset: 9 };
        end = Position{ line: 0, offset: 10 };
        gb.delete(&Range::new(start, end));
        assert_eq!(gb.to_string(), "This is  est.");
        assert_eq!(gb.gap_length, 2);
    }

    #[test]
    fn deleting_to_an_out_of_range_line_deletes_to_the_end_of_the_buffer() {
        let mut gb = GapBuffer::new("scribe\nlibrary".to_string());
        let start = Position{ line: 0, offset: 6 };
        let end = Position{ line: 2, offset: 10 };
        gb.delete(&Range::new(start, end));
        assert_eq!(gb.to_string(), "scribe");
    }

    #[test]
    fn deleting_to_an_out_of_range_column_deletes_to_the_end_of_the_buffer() {
        let mut gb = GapBuffer::new("scribe\nlibrary".to_string());
        let start = Position{ line: 0, offset: 0 };
        let end = Position{ line: 0, offset: 100 };
        gb.delete(&Range::new(start, end));
        assert_eq!(gb.to_string(), "library");
    }

    #[test]
    fn read_does_not_include_gap_contents_when_gap_is_at_start_of_range() {
        // Create a buffer and a range that captures the first character.
        let mut gb = GapBuffer::new("scribe".to_string());
        let range = Range::new(
            Position{ line: 0, offset: 0 },
            Position{ line: 0, offset: 1 }
        );

        // Delete the first character, which will move the gap buffer to the start.
        gb.delete(&range);
        assert_eq!(gb.to_string(), "cribe");

        // Ask for the first character, which would include the deleted
        // value, if the read function isn't smart enough to skip it.
        assert_eq!(gb.read(&range).unwrap(), "c");
    }

    #[test]
    fn read_does_not_include_gap_contents_when_gap_is_in_middle_of_range() {
        let mut gb = GapBuffer::new("scribe".to_string());

        // Delete data from the middle of the buffer, which will move the gap there.
        gb.delete(&Range::new(
            Position{ line: 0, offset: 2 },
            Position{ line: 0, offset: 4 }
        ));
        assert_eq!(gb.to_string(), "scbe");

        // Request a range that extends from the start to the finish.
        let range = Range::new(
            Position{ line: 0, offset: 0 },
            Position{ line: 0, offset: 4 }
        );
        assert_eq!(gb.read(&range).unwrap(), "scbe");
    }
}