edit/buffer/
mod.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! A text buffer for a text editor.
5//!
6//! Implements a Unicode-aware, layout-aware text buffer for terminals.
7//! It's based on a gap buffer. It has no line cache and instead relies
8//! on the performance of the ucd module for fast text navigation.
9//!
10//! ---
11//!
12//! If the project ever outgrows a basic gap buffer (e.g. to add time travel)
13//! an ideal, alternative architecture would be a piece table with immutable trees.
14//! The tree nodes can be allocated on the same arena allocator as the added chunks,
15//! making lifetime management fairly easy. The algorithm is described here:
16//! * <https://cdacamar.github.io/data%20structures/algorithms/benchmarking/text%20editors/c++/editor-data-structures/>
17//! * <https://github.com/cdacamar/fredbuf>
18//!
19//! The downside is that text navigation & search takes a performance hit due to small chunks.
20//! The solution to the former is to keep line caches, which further complicates the architecture.
21//! There's no solution for the latter. However, there's a chance that the performance will still be sufficient.
22
23mod gap_buffer;
24mod navigation;
25
26use std::borrow::Cow;
27use std::cell::UnsafeCell;
28use std::collections::LinkedList;
29use std::fmt::Write as _;
30use std::fs::File;
31use std::io::{Read as _, Write as _};
32use std::mem::{self, MaybeUninit};
33use std::ops::Range;
34use std::rc::Rc;
35use std::str;
36
37pub use gap_buffer::GapBuffer;
38
39use crate::arena::{ArenaString, scratch_arena};
40use crate::cell::SemiRefCell;
41use crate::clipboard::Clipboard;
42use crate::document::{ReadableDocument, WriteableDocument};
43use crate::framebuffer::{Framebuffer, IndexedColor};
44use crate::helpers::*;
45use crate::oklab::oklab_blend;
46use crate::simd::memchr2;
47use crate::unicode::{self, Cursor, MeasurementConfig};
48use crate::{apperr, icu, simd};
49
50/// The margin template is used for line numbers.
51/// The max. line number we should ever expect is probably 64-bit,
52/// and so this template fits 19 digits, followed by " │ ".
53const MARGIN_TEMPLATE: &str = "                    │ ";
54/// Just a bunch of whitespace you can use for turning tabs into spaces.
55/// Happens to reuse MARGIN_TEMPLATE, because it has sufficient whitespace.
56const TAB_WHITESPACE: &str = MARGIN_TEMPLATE;
57
58/// Stores statistics about the whole document.
59#[derive(Copy, Clone)]
60pub struct TextBufferStatistics {
61    logical_lines: CoordType,
62    visual_lines: CoordType,
63}
64
65/// Stores the active text selection anchors.
66///
67/// The two points are not sorted. Instead, `beg` refers to where the selection
68/// started being made and `end` refers to the currently being updated position.
69#[derive(Copy, Clone)]
70struct TextBufferSelection {
71    beg: Point,
72    end: Point,
73}
74
75/// In order to group actions into a single undo step,
76/// we need to know the type of action that was performed.
77/// This stores the action type.
78#[derive(Copy, Clone, Eq, PartialEq)]
79enum HistoryType {
80    Other,
81    Write,
82    Delete,
83}
84
85/// An undo/redo entry.
86struct HistoryEntry {
87    /// [`TextBuffer::cursor`] position before the change was made.
88    cursor_before: Point,
89    /// [`TextBuffer::selection`] before the change was made.
90    selection_before: Option<TextBufferSelection>,
91    /// [`TextBuffer::stats`] before the change was made.
92    stats_before: TextBufferStatistics,
93    /// [`GapBuffer::generation`] before the change was made.
94    generation_before: u32,
95    /// Logical cursor position where the change took place.
96    /// The position is at the start of the changed range.
97    cursor: Point,
98    /// Text that was deleted from the buffer.
99    deleted: Vec<u8>,
100    /// Text that was added to the buffer.
101    added: Vec<u8>,
102}
103
104/// Caches an ICU search operation.
105struct ActiveSearch {
106    /// The search pattern.
107    pattern: String,
108    /// The search options.
109    options: SearchOptions,
110    /// The ICU `UText` object.
111    text: icu::Text,
112    /// The ICU `URegularExpression` object.
113    regex: icu::Regex,
114    /// [`GapBuffer::generation`] when the search was created.
115    /// This is used to detect if we need to refresh the
116    /// [`ActiveSearch::regex`] object.
117    buffer_generation: u32,
118    /// [`TextBuffer::selection_generation`] when the search was
119    /// created. When the user manually selects text, we need to
120    /// refresh the [`ActiveSearch::pattern`] with it.
121    selection_generation: u32,
122    /// Stores the text buffer offset in between searches.
123    next_search_offset: usize,
124    /// If we know there were no hits, we can skip searching.
125    no_matches: bool,
126}
127
128/// Options for a search operation.
129#[derive(Default, Clone, Copy, Eq, PartialEq)]
130pub struct SearchOptions {
131    /// If true, the search is case-sensitive.
132    pub match_case: bool,
133    /// If true, the search matches whole words.
134    pub whole_word: bool,
135    /// If true, the search uses regex.
136    pub use_regex: bool,
137}
138
139/// Caches the start and length of the active edit line for a single edit.
140/// This helps us avoid having to remeasure the buffer after an edit.
141struct ActiveEditLineInfo {
142    /// Points to the start of the currently being edited line.
143    safe_start: Cursor,
144    /// Number of visual rows of the line that starts
145    /// at [`ActiveEditLineInfo::safe_start`].
146    line_height_in_rows: CoordType,
147    /// Byte distance from the start of the line at
148    /// [`ActiveEditLineInfo::safe_start`] to the next line.
149    distance_next_line_start: usize,
150}
151
152/// Char- or word-wise navigation? Your choice.
153pub enum CursorMovement {
154    Grapheme,
155    Word,
156}
157
158/// The result of a call to [`TextBuffer::render()`].
159pub struct RenderResult {
160    /// The maximum visual X position we encountered during rendering.
161    pub visual_pos_x_max: CoordType,
162}
163
164/// A [`TextBuffer`] with inner mutability.
165pub type TextBufferCell = SemiRefCell<TextBuffer>;
166
167/// A [`TextBuffer`] inside an [`Rc`].
168///
169/// We need this because the TUI system needs to borrow
170/// the given text buffer(s) until after the layout process.
171pub type RcTextBuffer = Rc<TextBufferCell>;
172
173/// A text buffer for a text editor.
174pub struct TextBuffer {
175    buffer: GapBuffer,
176
177    undo_stack: LinkedList<SemiRefCell<HistoryEntry>>,
178    redo_stack: LinkedList<SemiRefCell<HistoryEntry>>,
179    last_history_type: HistoryType,
180    last_save_generation: u32,
181
182    active_edit_line_info: Option<ActiveEditLineInfo>,
183    active_edit_depth: i32,
184    active_edit_off: usize,
185
186    stats: TextBufferStatistics,
187    cursor: Cursor,
188    // When scrolling significant amounts of text away from the cursor,
189    // rendering will naturally slow down proportionally to the distance.
190    // To avoid this, we cache the cursor position for rendering.
191    // Must be cleared on every edit or reflow.
192    cursor_for_rendering: Option<Cursor>,
193    selection: Option<TextBufferSelection>,
194    selection_generation: u32,
195    search: Option<UnsafeCell<ActiveSearch>>,
196
197    width: CoordType,
198    margin_width: CoordType,
199    margin_enabled: bool,
200    word_wrap_column: CoordType,
201    word_wrap_enabled: bool,
202    tab_size: CoordType,
203    indent_with_tabs: bool,
204    line_highlight_enabled: bool,
205    ruler: CoordType,
206    encoding: &'static str,
207    newlines_are_crlf: bool,
208    insert_final_newline: bool,
209    overtype: bool,
210
211    wants_cursor_visibility: bool,
212}
213
214impl TextBuffer {
215    /// Creates a new text buffer inside an [`Rc`].
216    /// See [`TextBuffer::new()`].
217    pub fn new_rc(small: bool) -> apperr::Result<RcTextBuffer> {
218        let buffer = Self::new(small)?;
219        Ok(Rc::new(SemiRefCell::new(buffer)))
220    }
221
222    /// Creates a new text buffer. With `small` you can control
223    /// if the buffer is optimized for <1MiB contents.
224    pub fn new(small: bool) -> apperr::Result<Self> {
225        Ok(Self {
226            buffer: GapBuffer::new(small)?,
227
228            undo_stack: LinkedList::new(),
229            redo_stack: LinkedList::new(),
230            last_history_type: HistoryType::Other,
231            last_save_generation: 0,
232
233            active_edit_line_info: None,
234            active_edit_depth: 0,
235            active_edit_off: 0,
236
237            stats: TextBufferStatistics { logical_lines: 1, visual_lines: 1 },
238            cursor: Default::default(),
239            cursor_for_rendering: None,
240            selection: None,
241            selection_generation: 0,
242            search: None,
243
244            width: 0,
245            margin_width: 0,
246            margin_enabled: false,
247            word_wrap_column: 0,
248            word_wrap_enabled: false,
249            tab_size: 4,
250            indent_with_tabs: false,
251            line_highlight_enabled: false,
252            ruler: 0,
253            encoding: "UTF-8",
254            newlines_are_crlf: cfg!(windows), // Windows users want CRLF
255            insert_final_newline: false,
256            overtype: false,
257
258            wants_cursor_visibility: false,
259        })
260    }
261
262    /// Length of the document in bytes.
263    pub fn text_length(&self) -> usize {
264        self.buffer.len()
265    }
266
267    /// Number of logical lines in the document,
268    /// that is, lines separated by newlines.
269    pub fn logical_line_count(&self) -> CoordType {
270        self.stats.logical_lines
271    }
272
273    /// Number of visual lines in the document,
274    /// that is, the number of lines after layout.
275    pub fn visual_line_count(&self) -> CoordType {
276        self.stats.visual_lines
277    }
278
279    /// Does the buffer need to be saved?
280    pub fn is_dirty(&self) -> bool {
281        self.last_save_generation != self.buffer.generation()
282    }
283
284    /// The buffer generation changes on every edit.
285    /// With this you can check if it has changed since
286    /// the last time you called this function.
287    pub fn generation(&self) -> u32 {
288        self.buffer.generation()
289    }
290
291    /// Force the buffer to be dirty.
292    pub fn mark_as_dirty(&mut self) {
293        self.last_save_generation = self.buffer.generation().wrapping_sub(1);
294    }
295
296    fn mark_as_clean(&mut self) {
297        self.last_save_generation = self.buffer.generation();
298    }
299
300    /// The encoding used during reading/writing. "UTF-8" is the default.
301    pub fn encoding(&self) -> &'static str {
302        self.encoding
303    }
304
305    /// Set the encoding used during reading/writing.
306    pub fn set_encoding(&mut self, encoding: &'static str) {
307        if self.encoding != encoding {
308            self.encoding = encoding;
309            self.mark_as_dirty();
310        }
311    }
312
313    /// The newline type used in the document. LF or CRLF.
314    pub fn is_crlf(&self) -> bool {
315        self.newlines_are_crlf
316    }
317
318    /// Changes the newline type without normalizing the document.
319    pub fn set_crlf(&mut self, crlf: bool) {
320        self.newlines_are_crlf = crlf;
321    }
322
323    /// Changes the newline type used in the document.
324    ///
325    /// NOTE: Cannot be undone.
326    pub fn normalize_newlines(&mut self, crlf: bool) {
327        let newline: &[u8] = if crlf { b"\r\n" } else { b"\n" };
328        let mut off = 0;
329
330        let mut cursor_offset = self.cursor.offset;
331        let mut cursor_for_rendering_offset =
332            self.cursor_for_rendering.map_or(cursor_offset, |c| c.offset);
333
334        #[cfg(debug_assertions)]
335        let mut adjusted_newlines = 0;
336
337        'outer: loop {
338            // Seek to the offset of the next line start.
339            loop {
340                let chunk = self.read_forward(off);
341                if chunk.is_empty() {
342                    break 'outer;
343                }
344
345                let (delta, line) = simd::lines_fwd(chunk, 0, 0, 1);
346                off += delta;
347                if line == 1 {
348                    break;
349                }
350            }
351
352            // Get the preceding newline.
353            let chunk = self.read_backward(off);
354            let chunk_newline_len = if chunk.ends_with(b"\r\n") { 2 } else { 1 };
355            let chunk_newline = &chunk[chunk.len() - chunk_newline_len..];
356
357            if chunk_newline != newline {
358                // If this newline is still before our cursor position, then it still has an effect on its offset.
359                // Any newline adjustments past that cursor position are irrelevant.
360                let delta = newline.len() as isize - chunk_newline_len as isize;
361                if off <= cursor_offset {
362                    cursor_offset = cursor_offset.saturating_add_signed(delta);
363                    #[cfg(debug_assertions)]
364                    {
365                        adjusted_newlines += 1;
366                    }
367                }
368                if off <= cursor_for_rendering_offset {
369                    cursor_for_rendering_offset =
370                        cursor_for_rendering_offset.saturating_add_signed(delta);
371                }
372
373                // Replace the newline.
374                off -= chunk_newline_len;
375                self.buffer.replace(off..off + chunk_newline_len, newline);
376                off += newline.len();
377            }
378        }
379
380        // If this fails, the cursor offset calculation above is wrong.
381        #[cfg(debug_assertions)]
382        debug_assert_eq!(adjusted_newlines, self.cursor.logical_pos.y);
383
384        self.cursor.offset = cursor_offset;
385        if let Some(cursor) = &mut self.cursor_for_rendering {
386            cursor.offset = cursor_for_rendering_offset;
387        }
388
389        self.newlines_are_crlf = crlf;
390    }
391
392    /// If enabled, automatically insert a final newline
393    /// when typing at the end of the file.
394    pub fn set_insert_final_newline(&mut self, enabled: bool) {
395        self.insert_final_newline = enabled;
396    }
397
398    /// Whether to insert or overtype text when writing.
399    pub fn is_overtype(&self) -> bool {
400        self.overtype
401    }
402
403    /// Set the overtype mode.
404    pub fn set_overtype(&mut self, overtype: bool) {
405        self.overtype = overtype;
406    }
407
408    /// Gets the logical cursor position, that is,
409    /// the position in lines and graphemes per line.
410    pub fn cursor_logical_pos(&self) -> Point {
411        self.cursor.logical_pos
412    }
413
414    /// Gets the visual cursor position, that is,
415    /// the position in laid out rows and columns.
416    pub fn cursor_visual_pos(&self) -> Point {
417        self.cursor.visual_pos
418    }
419
420    /// Gets the width of the left margin.
421    pub fn margin_width(&self) -> CoordType {
422        self.margin_width
423    }
424
425    /// Is the left margin enabled?
426    pub fn set_margin_enabled(&mut self, enabled: bool) -> bool {
427        if self.margin_enabled == enabled {
428            false
429        } else {
430            self.margin_enabled = enabled;
431            self.reflow();
432            true
433        }
434    }
435
436    /// Gets the width of the text contents for layout.
437    pub fn text_width(&self) -> CoordType {
438        self.width - self.margin_width
439    }
440
441    /// Ask the TUI system to scroll the buffer and make the cursor visible.
442    ///
443    /// TODO: This function shows that [`TextBuffer`] is poorly abstracted
444    /// away from the TUI system. The only reason this exists is so that
445    /// if someone outside the TUI code enables word-wrap, the TUI code
446    /// recognizes this and scrolls the cursor into view. But outside of this
447    /// scrolling, views, etc., are all UI concerns = this should not be here.
448    pub fn make_cursor_visible(&mut self) {
449        self.wants_cursor_visibility = true;
450    }
451
452    /// For the TUI code to retrieve a prior [`TextBuffer::make_cursor_visible()`] request.
453    pub fn take_cursor_visibility_request(&mut self) -> bool {
454        mem::take(&mut self.wants_cursor_visibility)
455    }
456
457    /// Is word-wrap enabled?
458    ///
459    /// Technically, this is a misnomer, because it's line-wrapping.
460    pub fn is_word_wrap_enabled(&self) -> bool {
461        self.word_wrap_enabled
462    }
463
464    /// Enable or disable word-wrap.
465    ///
466    /// NOTE: It's expected that the tui code calls `set_width()` sometime after this.
467    /// This will then trigger the actual recalculation of the cursor position.
468    pub fn set_word_wrap(&mut self, enabled: bool) {
469        if self.word_wrap_enabled != enabled {
470            self.word_wrap_enabled = enabled;
471            self.width = 0; // Force a reflow.
472            self.make_cursor_visible();
473        }
474    }
475
476    /// Set the width available for layout.
477    ///
478    /// Ideally this would be a pure UI concern, but the text buffer needs this
479    /// so that it can abstract away  visual cursor movement such as "go a line up".
480    /// What would that even mean if it didn't know how wide a line is?
481    pub fn set_width(&mut self, width: CoordType) -> bool {
482        if width <= 0 || width == self.width {
483            false
484        } else {
485            self.width = width;
486            self.reflow();
487            true
488        }
489    }
490
491    /// Set the tab width. Could be anything, but is expected to be 1-8.
492    pub fn tab_size(&self) -> CoordType {
493        self.tab_size
494    }
495
496    /// Set the tab size. Clamped to 1-8.
497    pub fn set_tab_size(&mut self, width: CoordType) -> bool {
498        let width = width.clamp(1, 8);
499        if width == self.tab_size {
500            false
501        } else {
502            self.tab_size = width;
503            self.reflow();
504            true
505        }
506    }
507
508    /// Returns whether tabs are used for indentation.
509    pub fn indent_with_tabs(&self) -> bool {
510        self.indent_with_tabs
511    }
512
513    /// Sets whether tabs or spaces are used for indentation.
514    pub fn set_indent_with_tabs(&mut self, indent_with_tabs: bool) {
515        self.indent_with_tabs = indent_with_tabs;
516    }
517
518    /// Sets whether the line the cursor is on should be highlighted.
519    pub fn set_line_highlight_enabled(&mut self, enabled: bool) {
520        self.line_highlight_enabled = enabled;
521    }
522
523    /// Sets a ruler column, e.g. 80.
524    pub fn set_ruler(&mut self, column: CoordType) {
525        self.ruler = column;
526    }
527
528    pub fn reflow(&mut self) {
529        self.reflow_internal(true);
530    }
531
532    fn recalc_after_content_changed(&mut self) {
533        self.reflow_internal(false);
534    }
535
536    fn reflow_internal(&mut self, force: bool) {
537        let word_wrap_column_before = self.word_wrap_column;
538
539        {
540            // +1 onto logical_lines, because line numbers are 1-based.
541            // +1 onto log10, because we want the digit width and not the actual log10.
542            // +3 onto log10, because we append " | " to the line numbers to form the margin.
543            self.margin_width = if self.margin_enabled {
544                self.stats.logical_lines.ilog10() as CoordType + 4
545            } else {
546                0
547            };
548
549            let text_width = self.text_width();
550            // 2 columns are required, because otherwise wide glyphs wouldn't ever fit.
551            self.word_wrap_column =
552                if self.word_wrap_enabled && text_width >= 2 { text_width } else { 0 };
553        }
554
555        self.cursor_for_rendering = None;
556
557        if force || self.word_wrap_column != word_wrap_column_before {
558            // Recalculate the cursor position.
559            self.cursor = self.cursor_move_to_logical_internal(
560                if self.word_wrap_column > 0 {
561                    Default::default()
562                } else {
563                    self.goto_line_start(self.cursor, self.cursor.logical_pos.y)
564                },
565                self.cursor.logical_pos,
566            );
567
568            // Recalculate the line statistics.
569            if self.word_wrap_column > 0 {
570                let end = self.cursor_move_to_logical_internal(self.cursor, Point::MAX);
571                self.stats.visual_lines = end.visual_pos.y + 1;
572            } else {
573                self.stats.visual_lines = self.stats.logical_lines;
574            }
575        }
576    }
577
578    /// Replaces the entire buffer contents with the given `text`.
579    /// Assumes that the line count doesn't change.
580    pub fn copy_from_str(&mut self, text: &dyn ReadableDocument) {
581        if self.buffer.copy_from(text) {
582            self.recalc_after_content_swap();
583            self.cursor_move_to_logical(Point { x: CoordType::MAX, y: 0 });
584
585            let delete = self.buffer.len() - self.cursor.offset;
586            if delete != 0 {
587                self.buffer.allocate_gap(self.cursor.offset, 0, delete);
588            }
589        }
590    }
591
592    fn recalc_after_content_swap(&mut self) {
593        // If the buffer was changed, nothing we previously saved can be relied upon.
594        self.undo_stack.clear();
595        self.redo_stack.clear();
596        self.last_history_type = HistoryType::Other;
597        self.cursor = Default::default();
598        self.set_selection(None);
599        self.mark_as_clean();
600        self.reflow();
601    }
602
603    /// Copies the contents of the buffer into a string.
604    pub fn save_as_string(&mut self, dst: &mut dyn WriteableDocument) {
605        self.buffer.copy_into(dst);
606        self.mark_as_clean();
607    }
608
609    /// Reads a file from disk into the text buffer, detecting encoding and BOM.
610    pub fn read_file(
611        &mut self,
612        file: &mut File,
613        encoding: Option<&'static str>,
614    ) -> apperr::Result<()> {
615        let scratch = scratch_arena(None);
616        let mut buf = scratch.alloc_uninit().transpose();
617        let mut first_chunk_len = 0;
618        let mut read = 0;
619
620        // Read enough bytes to detect the BOM.
621        while first_chunk_len < BOM_MAX_LEN {
622            read = file_read_uninit(file, &mut buf[first_chunk_len..])?;
623            if read == 0 {
624                break;
625            }
626            first_chunk_len += read;
627        }
628
629        if let Some(encoding) = encoding {
630            self.encoding = encoding;
631        } else {
632            let bom = detect_bom(unsafe { buf[..first_chunk_len].assume_init_ref() });
633            self.encoding = bom.unwrap_or("UTF-8");
634        }
635
636        // TODO: Since reading the file can fail, we should ensure that we also reset the cursor here.
637        // I don't do it, so that `recalc_after_content_swap()` works.
638        self.buffer.clear();
639
640        let done = read == 0;
641        if self.encoding == "UTF-8" {
642            self.read_file_as_utf8(file, &mut buf, first_chunk_len, done)?;
643        } else {
644            self.read_file_with_icu(file, &mut buf, first_chunk_len, done)?;
645        }
646
647        // Figure out
648        // * the logical line count
649        // * the newline type (LF or CRLF)
650        // * the indentation type (tabs or spaces)
651        // * whether there's a final newline
652        {
653            let chunk = self.read_forward(0);
654            let mut offset = 0;
655            let mut lines = 0;
656            // Number of lines ending in CRLF.
657            let mut crlf_count = 0;
658            // Number of lines starting with a tab.
659            let mut tab_indentations = 0;
660            // Number of lines starting with a space.
661            let mut space_indentations = 0;
662            // Histogram of the indentation depth of lines starting with between 2 and 8 spaces.
663            // In other words, `space_indentation_sizes[0]` is the number of lines starting with 2 spaces.
664            let mut space_indentation_sizes = [0; 7];
665
666            loop {
667                // Check if the line starts with a tab.
668                if offset < chunk.len() && chunk[offset] == b'\t' {
669                    tab_indentations += 1;
670                } else {
671                    // Otherwise, check how many spaces the line starts with. Searching for >8 spaces
672                    // allows us to reject lines that have more than 1 level of indentation.
673                    let space_indentation =
674                        chunk[offset..].iter().take(9).take_while(|&&c| c == b' ').count();
675
676                    // We'll also reject lines starting with 1 space, because that's too fickle as a heuristic.
677                    if (2..=8).contains(&space_indentation) {
678                        space_indentations += 1;
679
680                        // If we encounter an indentation depth of 6, it may either be a 6-space indentation,
681                        // two 3-space indentation or 3 2-space indentations. To make this work, we increment
682                        // all 3 possible histogram slots.
683                        //   2 -> 2
684                        //   3 -> 3
685                        //   4 -> 4 2
686                        //   5 -> 5
687                        //   6 -> 6 3 2
688                        //   7 -> 7
689                        //   8 -> 8 4 2
690                        space_indentation_sizes[space_indentation - 2] += 1;
691                        if space_indentation & 4 != 0 {
692                            space_indentation_sizes[0] += 1;
693                        }
694                        if space_indentation == 6 || space_indentation == 8 {
695                            space_indentation_sizes[space_indentation / 2 - 2] += 1;
696                        }
697                    }
698                }
699
700                (offset, lines) = simd::lines_fwd(chunk, offset, lines, lines + 1);
701
702                // Check if the preceding line ended in CRLF.
703                if offset >= 2 && &chunk[offset - 2..offset] == b"\r\n" {
704                    crlf_count += 1;
705                }
706
707                // We'll limit our heuristics to the first 1000 lines.
708                // That should hopefully be enough in practice.
709                if offset >= chunk.len() || lines >= 1000 {
710                    break;
711                }
712            }
713
714            // We'll assume CRLF if more than half of the lines end in CRLF.
715            let newlines_are_crlf = crlf_count >= lines / 2;
716
717            // We'll assume tabs if there are more lines starting with tabs than with spaces.
718            let indent_with_tabs = tab_indentations > space_indentations;
719            let tab_size = if indent_with_tabs {
720                // Tabs will get a visual size of 4 spaces by default.
721                4
722            } else {
723                // Otherwise, we'll assume the most common indentation depth.
724                // If there are conflicting indentation depths, we'll prefer the maximum, because in the loop
725                // above we incremented the histogram slot for 2-spaces when encountering 4-spaces and so on.
726                let mut max = 1;
727                let mut tab_size = 4;
728                for (i, &count) in space_indentation_sizes.iter().enumerate() {
729                    if count >= max {
730                        max = count;
731                        tab_size = i as CoordType + 2;
732                    }
733                }
734                tab_size
735            };
736
737            // If the file has more than 1000 lines, figure out how many are remaining.
738            if offset < chunk.len() {
739                (_, lines) = simd::lines_fwd(chunk, offset, lines, CoordType::MAX);
740            }
741
742            let final_newline = chunk.ends_with(b"\n");
743
744            // Add 1, because the last line doesn't end in a newline (it ends in the literal end).
745            self.stats.logical_lines = lines + 1;
746            self.stats.visual_lines = self.stats.logical_lines;
747            self.newlines_are_crlf = newlines_are_crlf;
748            self.insert_final_newline = final_newline;
749            self.indent_with_tabs = indent_with_tabs;
750            self.tab_size = tab_size;
751        }
752
753        self.recalc_after_content_swap();
754        Ok(())
755    }
756
757    fn read_file_as_utf8(
758        &mut self,
759        file: &mut File,
760        buf: &mut [MaybeUninit<u8>; 4 * KIBI],
761        first_chunk_len: usize,
762        done: bool,
763    ) -> apperr::Result<()> {
764        {
765            let mut first_chunk = unsafe { buf[..first_chunk_len].assume_init_ref() };
766            if first_chunk.starts_with(b"\xEF\xBB\xBF") {
767                first_chunk = &first_chunk[3..];
768                self.encoding = "UTF-8 BOM";
769            }
770
771            self.buffer.replace(0..0, first_chunk);
772        }
773
774        if done {
775            return Ok(());
776        }
777
778        // If we don't have file metadata, the input may be a pipe or a socket.
779        // Every read will have the same size until we hit the end.
780        let mut chunk_size = 128 * KIBI;
781        let mut extra_chunk_size = 128 * KIBI;
782
783        if let Ok(m) = file.metadata() {
784            // Usually the next read of size `chunk_size` will read the entire file,
785            // but if the size has changed for some reason, then `extra_chunk_size`
786            // should be large enough to read the rest of the file.
787            // 4KiB is not too large and not too slow.
788            let len = m.len() as usize;
789            chunk_size = len.saturating_sub(first_chunk_len);
790            extra_chunk_size = 4 * KIBI;
791        }
792
793        loop {
794            let gap = self.buffer.allocate_gap(self.text_length(), chunk_size, 0);
795            if gap.is_empty() {
796                break;
797            }
798
799            let read = file.read(gap)?;
800            if read == 0 {
801                break;
802            }
803
804            self.buffer.commit_gap(read);
805            chunk_size = extra_chunk_size;
806        }
807
808        Ok(())
809    }
810
811    fn read_file_with_icu(
812        &mut self,
813        file: &mut File,
814        buf: &mut [MaybeUninit<u8>; 4 * KIBI],
815        first_chunk_len: usize,
816        mut done: bool,
817    ) -> apperr::Result<()> {
818        let scratch = scratch_arena(None);
819        let pivot_buffer = scratch.alloc_uninit_slice(4 * KIBI);
820        let mut c = icu::Converter::new(pivot_buffer, self.encoding, "UTF-8")?;
821        let mut first_chunk = unsafe { buf[..first_chunk_len].assume_init_ref() };
822
823        while !first_chunk.is_empty() {
824            let off = self.text_length();
825            let gap = self.buffer.allocate_gap(off, 8 * KIBI, 0);
826            let (input_advance, mut output_advance) =
827                c.convert(first_chunk, slice_as_uninit_mut(gap))?;
828
829            // Remove the BOM from the file, if this is the first chunk.
830            // Our caller ensures to only call us once the BOM has been identified,
831            // which means that if there's a BOM it must be wholly contained in this chunk.
832            if off == 0 {
833                let written = &mut gap[..output_advance];
834                if written.starts_with(b"\xEF\xBB\xBF") {
835                    written.copy_within(3.., 0);
836                    output_advance -= 3;
837                }
838            }
839
840            self.buffer.commit_gap(output_advance);
841            first_chunk = &first_chunk[input_advance..];
842        }
843
844        let mut buf_len = 0;
845
846        loop {
847            if !done {
848                let read = file_read_uninit(file, &mut buf[buf_len..])?;
849                buf_len += read;
850                done = read == 0;
851            }
852
853            let gap = self.buffer.allocate_gap(self.text_length(), 8 * KIBI, 0);
854            if gap.is_empty() {
855                break;
856            }
857
858            let read = unsafe { buf[..buf_len].assume_init_ref() };
859            let (input_advance, output_advance) = c.convert(read, slice_as_uninit_mut(gap))?;
860
861            self.buffer.commit_gap(output_advance);
862
863            let flush = done && buf_len == 0;
864            buf_len -= input_advance;
865            buf.copy_within(input_advance.., 0);
866
867            if flush {
868                break;
869            }
870        }
871
872        Ok(())
873    }
874
875    /// Writes the text buffer contents to a file, handling BOM and encoding.
876    pub fn write_file(&mut self, file: &mut File) -> apperr::Result<()> {
877        let mut offset = 0;
878
879        if self.encoding.starts_with("UTF-8") {
880            if self.encoding == "UTF-8 BOM" {
881                file.write_all(b"\xEF\xBB\xBF")?;
882            }
883            loop {
884                let chunk = self.read_forward(offset);
885                if chunk.is_empty() {
886                    break;
887                }
888                file.write_all(chunk)?;
889                offset += chunk.len();
890            }
891        } else {
892            self.write_file_with_icu(file)?;
893        }
894
895        self.mark_as_clean();
896        Ok(())
897    }
898
899    fn write_file_with_icu(&mut self, file: &mut File) -> apperr::Result<()> {
900        let scratch = scratch_arena(None);
901        let pivot_buffer = scratch.alloc_uninit_slice(4 * KIBI);
902        let buf = scratch.alloc_uninit_slice(4 * KIBI);
903        let mut c = icu::Converter::new(pivot_buffer, "UTF-8", self.encoding)?;
904        let mut offset = 0;
905
906        // Write the BOM for the encodings we know need it.
907        if self.encoding.starts_with("UTF-16")
908            || self.encoding.starts_with("UTF-32")
909            || self.encoding == "GB18030"
910        {
911            let (_, output_advance) = c.convert(b"\xEF\xBB\xBF", buf)?;
912            let chunk = unsafe { buf[..output_advance].assume_init_ref() };
913            file.write_all(chunk)?;
914        }
915
916        loop {
917            let chunk = self.read_forward(offset);
918            if chunk.is_empty() {
919                break;
920            }
921
922            let (input_advance, output_advance) = c.convert(chunk, buf)?;
923            let chunk = unsafe { buf[..output_advance].assume_init_ref() };
924            file.write_all(chunk)?;
925            offset += input_advance;
926        }
927
928        Ok(())
929    }
930
931    /// Returns the current selection.
932    pub fn has_selection(&self) -> bool {
933        self.selection.is_some()
934    }
935
936    fn set_selection(&mut self, selection: Option<TextBufferSelection>) -> u32 {
937        self.selection = selection.filter(|s| s.beg != s.end);
938        self.selection_generation = self.selection_generation.wrapping_add(1);
939        self.selection_generation
940    }
941
942    /// Moves the cursor by `offset` and updates the selection to contain it.
943    pub fn selection_update_offset(&mut self, offset: usize) {
944        self.set_cursor_for_selection(self.cursor_move_to_offset_internal(self.cursor, offset));
945    }
946
947    /// Moves the cursor to `visual_pos` and updates the selection to contain it.
948    pub fn selection_update_visual(&mut self, visual_pos: Point) {
949        self.set_cursor_for_selection(self.cursor_move_to_visual_internal(self.cursor, visual_pos));
950    }
951
952    /// Moves the cursor to `logical_pos` and updates the selection to contain it.
953    pub fn selection_update_logical(&mut self, logical_pos: Point) {
954        self.set_cursor_for_selection(
955            self.cursor_move_to_logical_internal(self.cursor, logical_pos),
956        );
957    }
958
959    /// Moves the cursor by `delta` and updates the selection to contain it.
960    pub fn selection_update_delta(&mut self, granularity: CursorMovement, delta: CoordType) {
961        self.set_cursor_for_selection(self.cursor_move_delta_internal(
962            self.cursor,
963            granularity,
964            delta,
965        ));
966    }
967
968    /// Select the current word.
969    pub fn select_word(&mut self) {
970        let Range { start, end } = navigation::word_select(&self.buffer, self.cursor.offset);
971        let beg = self.cursor_move_to_offset_internal(self.cursor, start);
972        let end = self.cursor_move_to_offset_internal(beg, end);
973        unsafe { self.set_cursor(end) };
974        self.set_selection(Some(TextBufferSelection {
975            beg: beg.logical_pos,
976            end: end.logical_pos,
977        }));
978    }
979
980    /// Select the current line.
981    pub fn select_line(&mut self) {
982        let beg = self.cursor_move_to_logical_internal(
983            self.cursor,
984            Point { x: 0, y: self.cursor.logical_pos.y },
985        );
986        let end = self
987            .cursor_move_to_logical_internal(beg, Point { x: 0, y: self.cursor.logical_pos.y + 1 });
988        unsafe { self.set_cursor(end) };
989        self.set_selection(Some(TextBufferSelection {
990            beg: beg.logical_pos,
991            end: end.logical_pos,
992        }));
993    }
994
995    /// Select the entire document.
996    pub fn select_all(&mut self) {
997        let beg = Default::default();
998        let end = self.cursor_move_to_logical_internal(beg, Point::MAX);
999        unsafe { self.set_cursor(end) };
1000        self.set_selection(Some(TextBufferSelection {
1001            beg: beg.logical_pos,
1002            end: end.logical_pos,
1003        }));
1004    }
1005
1006    /// Starts a new selection, if there's none already.
1007    pub fn start_selection(&mut self) {
1008        if self.selection.is_none() {
1009            self.set_selection(Some(TextBufferSelection {
1010                beg: self.cursor.logical_pos,
1011                end: self.cursor.logical_pos,
1012            }));
1013        }
1014    }
1015
1016    /// Destroy the current selection.
1017    pub fn clear_selection(&mut self) -> bool {
1018        let had_selection = self.selection.is_some();
1019        self.set_selection(None);
1020        had_selection
1021    }
1022
1023    /// Find the next occurrence of the given `pattern` and select it.
1024    pub fn find_and_select(&mut self, pattern: &str, options: SearchOptions) -> apperr::Result<()> {
1025        if let Some(search) = &mut self.search {
1026            let search = search.get_mut();
1027            // When the search input changes we must reset the search.
1028            if search.pattern != pattern || search.options != options {
1029                self.search = None;
1030            }
1031
1032            // When transitioning from some search to no search, we must clear the selection.
1033            if pattern.is_empty()
1034                && let Some(TextBufferSelection { beg, .. }) = self.selection
1035            {
1036                self.cursor_move_to_logical(beg);
1037            }
1038        }
1039
1040        if pattern.is_empty() {
1041            return Ok(());
1042        }
1043
1044        let search = match &self.search {
1045            Some(search) => unsafe { &mut *search.get() },
1046            None => {
1047                let search = self.find_construct_search(pattern, options)?;
1048                self.search = Some(UnsafeCell::new(search));
1049                unsafe { &mut *self.search.as_ref().unwrap().get() }
1050            }
1051        };
1052
1053        // If we previously searched through the entire document and found 0 matches,
1054        // then we can avoid searching again.
1055        if search.no_matches {
1056            return Ok(());
1057        }
1058
1059        // If the user moved the cursor since the last search, but the needle remained the same,
1060        // we still need to move the start of the search to the new cursor position.
1061        let next_search_offset = match self.selection {
1062            Some(TextBufferSelection { beg, end }) => {
1063                if self.selection_generation == search.selection_generation {
1064                    search.next_search_offset
1065                } else {
1066                    self.cursor_move_to_logical_internal(self.cursor, beg.min(end)).offset
1067                }
1068            }
1069            _ => self.cursor.offset,
1070        };
1071
1072        self.find_select_next(search, next_search_offset, true);
1073        Ok(())
1074    }
1075
1076    /// Find the next occurrence of the given `pattern` and replace it with `replacement`.
1077    pub fn find_and_replace(
1078        &mut self,
1079        pattern: &str,
1080        options: SearchOptions,
1081        replacement: &str,
1082    ) -> apperr::Result<()> {
1083        // Editors traditionally replace the previous search hit, not the next possible one.
1084        if let (Some(search), Some(..)) = (&mut self.search, &self.selection) {
1085            let search = search.get_mut();
1086            if search.selection_generation == self.selection_generation {
1087                self.write(replacement.as_bytes(), self.cursor, true);
1088            }
1089        }
1090
1091        self.find_and_select(pattern, options)
1092    }
1093
1094    /// Find all occurrences of the given `pattern` and replace them with `replacement`.
1095    pub fn find_and_replace_all(
1096        &mut self,
1097        pattern: &str,
1098        options: SearchOptions,
1099        replacement: &str,
1100    ) -> apperr::Result<()> {
1101        let replacement = replacement.as_bytes();
1102        let mut search = self.find_construct_search(pattern, options)?;
1103        let mut offset = 0;
1104
1105        loop {
1106            self.find_select_next(&mut search, offset, false);
1107            if !self.has_selection() {
1108                break;
1109            }
1110            self.write(replacement, self.cursor, true);
1111            offset = self.cursor.offset;
1112        }
1113
1114        Ok(())
1115    }
1116
1117    fn find_construct_search(
1118        &self,
1119        pattern: &str,
1120        options: SearchOptions,
1121    ) -> apperr::Result<ActiveSearch> {
1122        if pattern.is_empty() {
1123            return Err(apperr::Error::Icu(1)); // U_ILLEGAL_ARGUMENT_ERROR
1124        }
1125
1126        let sanitized_pattern = if options.whole_word && options.use_regex {
1127            Cow::Owned(format!(r"\b(?:{pattern})\b"))
1128        } else if options.whole_word {
1129            let mut p = String::with_capacity(pattern.len() + 16);
1130            p.push_str(r"\b");
1131
1132            // Escape regex special characters.
1133            let b = unsafe { p.as_mut_vec() };
1134            for &byte in pattern.as_bytes() {
1135                match byte {
1136                    b'*' | b'?' | b'+' | b'[' | b'(' | b')' | b'{' | b'}' | b'^' | b'$' | b'|'
1137                    | b'\\' | b'.' => {
1138                        b.push(b'\\');
1139                        b.push(byte);
1140                    }
1141                    _ => b.push(byte),
1142                }
1143            }
1144
1145            p.push_str(r"\b");
1146            Cow::Owned(p)
1147        } else {
1148            Cow::Borrowed(pattern)
1149        };
1150
1151        let mut flags = icu::Regex::MULTILINE;
1152        if !options.match_case {
1153            flags |= icu::Regex::CASE_INSENSITIVE;
1154        }
1155        if !options.use_regex && !options.whole_word {
1156            flags |= icu::Regex::LITERAL;
1157        }
1158
1159        // Move the start of the search to the start of the selection,
1160        // or otherwise to the current cursor position.
1161
1162        let text = unsafe { icu::Text::new(self)? };
1163        let regex = unsafe { icu::Regex::new(&sanitized_pattern, flags, &text)? };
1164
1165        Ok(ActiveSearch {
1166            pattern: pattern.to_string(),
1167            options,
1168            text,
1169            regex,
1170            buffer_generation: self.buffer.generation(),
1171            selection_generation: 0,
1172            next_search_offset: 0,
1173            no_matches: false,
1174        })
1175    }
1176
1177    fn find_select_next(&mut self, search: &mut ActiveSearch, offset: usize, wrap: bool) {
1178        if search.buffer_generation != self.buffer.generation() {
1179            unsafe { search.regex.set_text(&mut search.text, offset) };
1180            search.buffer_generation = self.buffer.generation();
1181            search.next_search_offset = offset;
1182        } else if search.next_search_offset != offset {
1183            search.next_search_offset = offset;
1184            search.regex.reset(offset);
1185        }
1186
1187        let mut hit = search.regex.next();
1188
1189        // If we hit the end of the buffer, and we know that there's something to find,
1190        // start the search again from the beginning (= wrap around).
1191        if wrap && hit.is_none() && search.next_search_offset != 0 {
1192            search.next_search_offset = 0;
1193            search.regex.reset(0);
1194            hit = search.regex.next();
1195        }
1196
1197        search.selection_generation = if let Some(range) = hit {
1198            // Now the search offset is no more at the start of the buffer.
1199            search.next_search_offset = range.end;
1200
1201            let beg = self.cursor_move_to_offset_internal(self.cursor, range.start);
1202            let end = self.cursor_move_to_offset_internal(beg, range.end);
1203
1204            unsafe { self.set_cursor(end) };
1205            self.make_cursor_visible();
1206
1207            self.set_selection(Some(TextBufferSelection {
1208                beg: beg.logical_pos,
1209                end: end.logical_pos,
1210            }))
1211        } else {
1212            // Avoid searching through the entire document again if we know there's nothing to find.
1213            search.no_matches = true;
1214            self.set_selection(None)
1215        };
1216    }
1217
1218    fn measurement_config(&self) -> MeasurementConfig<'_> {
1219        MeasurementConfig::new(&self.buffer)
1220            .with_word_wrap_column(self.word_wrap_column)
1221            .with_tab_size(self.tab_size)
1222    }
1223
1224    fn goto_line_start(&self, cursor: Cursor, y: CoordType) -> Cursor {
1225        let mut result = cursor;
1226        let mut seek_to_line_start = true;
1227
1228        if y > result.logical_pos.y {
1229            while y > result.logical_pos.y {
1230                let chunk = self.read_forward(result.offset);
1231                if chunk.is_empty() {
1232                    break;
1233                }
1234
1235                let (delta, line) = simd::lines_fwd(chunk, 0, result.logical_pos.y, y);
1236                result.offset += delta;
1237                result.logical_pos.y = line;
1238            }
1239
1240            // If we're at the end of the buffer, we could either be there because the last
1241            // character in the buffer is genuinely a newline, or because the buffer ends in a
1242            // line of text without trailing newline. The only way to make sure is to seek
1243            // backwards to the line start again. But otherwise we can skip that.
1244            seek_to_line_start =
1245                result.offset == self.text_length() && result.offset != cursor.offset;
1246        }
1247
1248        if seek_to_line_start {
1249            loop {
1250                let chunk = self.read_backward(result.offset);
1251                if chunk.is_empty() {
1252                    break;
1253                }
1254
1255                let (delta, line) = simd::lines_bwd(chunk, chunk.len(), result.logical_pos.y, y);
1256                result.offset -= chunk.len() - delta;
1257                result.logical_pos.y = line;
1258                if delta > 0 {
1259                    break;
1260                }
1261            }
1262        }
1263
1264        if result.offset == cursor.offset {
1265            return result;
1266        }
1267
1268        result.logical_pos.x = 0;
1269        result.visual_pos.x = 0;
1270        result.visual_pos.y = result.logical_pos.y;
1271        result.column = 0;
1272        result.wrap_opp = false;
1273
1274        if self.word_wrap_column > 0 {
1275            let upward = result.offset < cursor.offset;
1276            let (top, bottom) = if upward { (result, cursor) } else { (cursor, result) };
1277
1278            let mut bottom_remeasured =
1279                self.measurement_config().with_cursor(top).goto_logical(bottom.logical_pos);
1280
1281            // The second problem is that visual positions can be ambiguous. A single logical position
1282            // can map to two visual positions: One at the end of the preceding line in front of
1283            // a word wrap, and another at the start of the next line after the same word wrap.
1284            //
1285            // This, however, only applies if we go upwards, because only then `bottom ≅ cursor`,
1286            // and thus only then this `bottom` is ambiguous. Otherwise, `bottom ≅ result`
1287            // and `result` is at a line start which is never ambiguous.
1288            if upward {
1289                let a = bottom_remeasured.visual_pos.x;
1290                let b = bottom.visual_pos.x;
1291                bottom_remeasured.visual_pos.y = bottom_remeasured.visual_pos.y
1292                    + (a != 0 && b == 0) as CoordType
1293                    - (a == 0 && b != 0) as CoordType;
1294            }
1295
1296            let mut delta = bottom_remeasured.visual_pos.y - top.visual_pos.y;
1297            if upward {
1298                delta = -delta;
1299            }
1300
1301            result.visual_pos.y = cursor.visual_pos.y + delta;
1302        }
1303
1304        result
1305    }
1306
1307    fn cursor_move_to_offset_internal(&self, mut cursor: Cursor, offset: usize) -> Cursor {
1308        if offset == cursor.offset {
1309            return cursor;
1310        }
1311
1312        // goto_line_start() is fast for seeking across lines _if_ line wrapping is disabled.
1313        // For backward seeking we have to use it either way, so we're covered there.
1314        // This implements the forward seeking portion, if it's approx. worth doing so.
1315        if self.word_wrap_column <= 0 && offset.saturating_sub(cursor.offset) > 1024 {
1316            // Replacing this with a more optimal, direct memchr() loop appears
1317            // to improve performance only marginally by another 2% or so.
1318            // Still, it's kind of "meh" looking at how poorly this is implemented...
1319            loop {
1320                let next = self.goto_line_start(cursor, cursor.logical_pos.y + 1);
1321                // Stop when we either ran past the target offset,
1322                // or when we hit the end of the buffer and `goto_line_start` backtracked to the line start.
1323                if next.offset > offset || next.offset <= cursor.offset {
1324                    break;
1325                }
1326                cursor = next;
1327            }
1328        }
1329
1330        while offset < cursor.offset {
1331            cursor = self.goto_line_start(cursor, cursor.logical_pos.y - 1);
1332        }
1333
1334        self.measurement_config().with_cursor(cursor).goto_offset(offset)
1335    }
1336
1337    fn cursor_move_to_logical_internal(&self, mut cursor: Cursor, pos: Point) -> Cursor {
1338        let pos = Point { x: pos.x.max(0), y: pos.y.max(0) };
1339
1340        if pos == cursor.logical_pos {
1341            return cursor;
1342        }
1343
1344        // goto_line_start() is the fastest way for seeking across lines. As such we always
1345        // use it if the requested `.y` position is different. We still need to use it if the
1346        // `.x` position is smaller, but only because `goto_logical()` cannot seek backwards.
1347        if pos.y != cursor.logical_pos.y || pos.x < cursor.logical_pos.x {
1348            cursor = self.goto_line_start(cursor, pos.y);
1349        }
1350
1351        self.measurement_config().with_cursor(cursor).goto_logical(pos)
1352    }
1353
1354    fn cursor_move_to_visual_internal(&self, mut cursor: Cursor, pos: Point) -> Cursor {
1355        let pos = Point { x: pos.x.max(0), y: pos.y.max(0) };
1356
1357        if pos == cursor.visual_pos {
1358            return cursor;
1359        }
1360
1361        if self.word_wrap_column <= 0 {
1362            // Identical to the fast-pass in `cursor_move_to_logical_internal()`.
1363            if pos.y != cursor.visual_pos.y || pos.x < cursor.visual_pos.x {
1364                cursor = self.goto_line_start(cursor, pos.y);
1365            }
1366        } else {
1367            // `goto_visual()` can only seek forward, so we need to seek backward here if needed.
1368            // NOTE that this intentionally doesn't use the `Eq` trait of `Point`, because if
1369            // `pos.y == cursor.visual_pos.y` we don't need to go to `cursor.logical_pos.y - 1`.
1370            while pos.y < cursor.visual_pos.y {
1371                cursor = self.goto_line_start(cursor, cursor.logical_pos.y - 1);
1372            }
1373            if pos.y == cursor.visual_pos.y && pos.x < cursor.visual_pos.x {
1374                cursor = self.goto_line_start(cursor, cursor.logical_pos.y);
1375            }
1376        }
1377
1378        self.measurement_config().with_cursor(cursor).goto_visual(pos)
1379    }
1380
1381    fn cursor_move_delta_internal(
1382        &self,
1383        mut cursor: Cursor,
1384        granularity: CursorMovement,
1385        mut delta: CoordType,
1386    ) -> Cursor {
1387        if delta == 0 {
1388            return cursor;
1389        }
1390
1391        let sign = if delta > 0 { 1 } else { -1 };
1392
1393        match granularity {
1394            CursorMovement::Grapheme => {
1395                let start_x = if delta > 0 { 0 } else { CoordType::MAX };
1396
1397                loop {
1398                    let target_x = cursor.logical_pos.x + delta;
1399
1400                    cursor = self.cursor_move_to_logical_internal(
1401                        cursor,
1402                        Point { x: target_x, y: cursor.logical_pos.y },
1403                    );
1404
1405                    // We can stop if we ran out of remaining delta
1406                    // (or perhaps ran past the goal; in either case the sign would've changed),
1407                    // or if we hit the beginning or end of the buffer.
1408                    delta = target_x - cursor.logical_pos.x;
1409                    if delta.signum() != sign
1410                        || (delta < 0 && cursor.offset == 0)
1411                        || (delta > 0 && cursor.offset >= self.text_length())
1412                    {
1413                        break;
1414                    }
1415
1416                    cursor = self.cursor_move_to_logical_internal(
1417                        cursor,
1418                        Point { x: start_x, y: cursor.logical_pos.y + sign },
1419                    );
1420
1421                    // We crossed a newline which counts for 1 grapheme cluster.
1422                    // So, we also need to run the same check again.
1423                    delta -= sign;
1424                    if delta.signum() != sign
1425                        || cursor.offset == 0
1426                        || cursor.offset >= self.text_length()
1427                    {
1428                        break;
1429                    }
1430                }
1431            }
1432            CursorMovement::Word => {
1433                let doc = &self.buffer as &dyn ReadableDocument;
1434                let mut offset = self.cursor.offset;
1435
1436                while delta != 0 {
1437                    if delta < 0 {
1438                        offset = navigation::word_backward(doc, offset);
1439                    } else {
1440                        offset = navigation::word_forward(doc, offset);
1441                    }
1442                    delta -= sign;
1443                }
1444
1445                cursor = self.cursor_move_to_offset_internal(cursor, offset);
1446            }
1447        }
1448
1449        cursor
1450    }
1451
1452    /// Moves the cursor to the given offset.
1453    pub fn cursor_move_to_offset(&mut self, offset: usize) {
1454        unsafe { self.set_cursor(self.cursor_move_to_offset_internal(self.cursor, offset)) }
1455    }
1456
1457    /// Moves the cursor to the given logical position.
1458    pub fn cursor_move_to_logical(&mut self, pos: Point) {
1459        unsafe { self.set_cursor(self.cursor_move_to_logical_internal(self.cursor, pos)) }
1460    }
1461
1462    /// Moves the cursor to the given visual position.
1463    pub fn cursor_move_to_visual(&mut self, pos: Point) {
1464        unsafe { self.set_cursor(self.cursor_move_to_visual_internal(self.cursor, pos)) }
1465    }
1466
1467    /// Moves the cursor by the given delta.
1468    pub fn cursor_move_delta(&mut self, granularity: CursorMovement, delta: CoordType) {
1469        unsafe { self.set_cursor(self.cursor_move_delta_internal(self.cursor, granularity, delta)) }
1470    }
1471
1472    /// Sets the cursor to the given position, and clears the selection.
1473    ///
1474    /// # Safety
1475    ///
1476    /// This function performs no checks that the cursor is valid. "Valid" in this case means
1477    /// that the TextBuffer has not been modified since you received the cursor from this class.
1478    pub unsafe fn set_cursor(&mut self, cursor: Cursor) {
1479        self.set_cursor_internal(cursor);
1480        self.last_history_type = HistoryType::Other;
1481        self.set_selection(None);
1482    }
1483
1484    fn set_cursor_for_selection(&mut self, cursor: Cursor) {
1485        let beg = match self.selection {
1486            Some(TextBufferSelection { beg, .. }) => beg,
1487            None => self.cursor.logical_pos,
1488        };
1489
1490        self.set_cursor_internal(cursor);
1491        self.last_history_type = HistoryType::Other;
1492
1493        let end = self.cursor.logical_pos;
1494        self.set_selection(if beg == end { None } else { Some(TextBufferSelection { beg, end }) });
1495    }
1496
1497    fn set_cursor_internal(&mut self, cursor: Cursor) {
1498        debug_assert!(
1499            cursor.offset <= self.text_length()
1500                && cursor.logical_pos.x >= 0
1501                && cursor.logical_pos.y >= 0
1502                && cursor.logical_pos.y <= self.stats.logical_lines
1503                && cursor.visual_pos.x >= 0
1504                && (self.word_wrap_column <= 0 || cursor.visual_pos.x <= self.word_wrap_column)
1505                && cursor.visual_pos.y >= 0
1506                && cursor.visual_pos.y <= self.stats.visual_lines
1507        );
1508        self.cursor = cursor;
1509    }
1510
1511    /// Extracts a rectangular region of the text buffer and writes it to the framebuffer.
1512    /// The `destination` rect is framebuffer coordinates. The extracted region within this
1513    /// text buffer has the given `origin` and the same size as the `destination` rect.
1514    pub fn render(
1515        &mut self,
1516        origin: Point,
1517        destination: Rect,
1518        focused: bool,
1519        fb: &mut Framebuffer,
1520    ) -> Option<RenderResult> {
1521        if destination.is_empty() {
1522            return None;
1523        }
1524
1525        let scratch = scratch_arena(None);
1526        let width = destination.width();
1527        let height = destination.height();
1528        let line_number_width = self.margin_width.max(3) as usize - 3;
1529        let text_width = width - self.margin_width;
1530        let mut visualizer_buf = [0xE2, 0x90, 0x80]; // U+2400 in UTF8
1531        let mut line = ArenaString::new_in(&scratch);
1532        let mut visual_pos_x_max = 0;
1533
1534        // Pick the cursor closer to the `origin.y`.
1535        let mut cursor = {
1536            let a = self.cursor;
1537            let b = self.cursor_for_rendering.unwrap_or_default();
1538            let da = (a.visual_pos.y - origin.y).abs();
1539            let db = (b.visual_pos.y - origin.y).abs();
1540            if da < db { a } else { b }
1541        };
1542
1543        let [selection_beg, selection_end] = match self.selection {
1544            None => [Point::MIN, Point::MIN],
1545            Some(TextBufferSelection { beg, end }) => minmax(beg, end),
1546        };
1547
1548        line.reserve(width as usize * 2);
1549
1550        for y in 0..height {
1551            line.clear();
1552
1553            let visual_line = origin.y + y;
1554            let mut cursor_beg =
1555                self.cursor_move_to_visual_internal(cursor, Point { x: origin.x, y: visual_line });
1556            let cursor_end = self.cursor_move_to_visual_internal(
1557                cursor_beg,
1558                Point { x: origin.x + text_width, y: visual_line },
1559            );
1560
1561            // Accelerate the next render pass by remembering where we started off.
1562            if y == 0 {
1563                self.cursor_for_rendering = Some(cursor_beg);
1564            }
1565
1566            if line_number_width != 0 {
1567                if visual_line >= self.stats.visual_lines {
1568                    // Past the end of the buffer? Place "    | " in the margin.
1569                    // Since we know that we won't see line numbers greater than i64::MAX (9223372036854775807)
1570                    // any time soon, we can use a static string as the template (`MARGIN`) and slice it,
1571                    // because `line_number_width` can't possibly be larger than 19.
1572                    let off = 19 - line_number_width;
1573                    unsafe { std::hint::assert_unchecked(off < MARGIN_TEMPLATE.len()) };
1574                    line.push_str(&MARGIN_TEMPLATE[off..]);
1575                } else if self.word_wrap_column <= 0 || cursor_beg.logical_pos.x == 0 {
1576                    // Regular line? Place "123 | " in the margin.
1577                    _ = write!(line, "{:1$} │ ", cursor_beg.logical_pos.y + 1, line_number_width);
1578                } else {
1579                    // Wrapped line? Place " ... | " in the margin.
1580                    let number_width = (cursor_beg.logical_pos.y + 1).ilog10() as usize + 1;
1581                    _ = write!(
1582                        line,
1583                        "{0:1$}{0:∙<2$} │ ",
1584                        "",
1585                        line_number_width - number_width,
1586                        number_width
1587                    );
1588                    // Blending in the background color will "dim" the indicator dots.
1589                    let left = destination.left;
1590                    let top = destination.top + y;
1591                    fb.blend_fg(
1592                        Rect {
1593                            left,
1594                            top,
1595                            right: left + line_number_width as CoordType,
1596                            bottom: top + 1,
1597                        },
1598                        fb.indexed_alpha(IndexedColor::Background, 1, 2),
1599                    );
1600                }
1601            }
1602
1603            // Nothing to do if the entire line is empty.
1604            if cursor_beg.offset != cursor_end.offset {
1605                // If we couldn't reach the left edge, we may have stopped short due to a wide glyph.
1606                // In that case we'll try to find the next character and then compute by how many
1607                // columns it overlaps the left edge (can be anything between 1 and 7).
1608                if cursor_beg.visual_pos.x < origin.x {
1609                    let cursor_next = self.cursor_move_to_logical_internal(
1610                        cursor_beg,
1611                        Point { x: cursor_beg.logical_pos.x + 1, y: cursor_beg.logical_pos.y },
1612                    );
1613
1614                    if cursor_next.visual_pos.x > origin.x {
1615                        let overlap = cursor_next.visual_pos.x - origin.x;
1616                        debug_assert!((1..=7).contains(&overlap));
1617                        line.push_str(&TAB_WHITESPACE[..overlap as usize]);
1618                        cursor_beg = cursor_next;
1619                    }
1620                }
1621
1622                fn find_control_char(text: &[u8], mut offset: usize) -> usize {
1623                    while offset < text.len() && (text[offset] >= 0x20 && text[offset] != 0x7f) {
1624                        offset += 1;
1625                    }
1626                    offset
1627                }
1628
1629                let mut global_off = cursor_beg.offset;
1630                let mut cursor_tab = cursor_beg;
1631                let mut cursor_visualizer = cursor_beg;
1632
1633                while global_off < cursor_end.offset {
1634                    let chunk = self.read_forward(global_off);
1635                    let chunk = &chunk[..chunk.len().min(cursor_end.offset - global_off)];
1636
1637                    let mut chunk_off = 0;
1638                    while chunk_off < chunk.len() {
1639                        let beg = chunk_off;
1640                        chunk_off = find_control_char(chunk, beg);
1641
1642                        for chunk in chunk[beg..chunk_off].utf8_chunks() {
1643                            if !chunk.valid().is_empty() {
1644                                line.push_str(chunk.valid());
1645                            }
1646                            if !chunk.invalid().is_empty() {
1647                                line.push('\u{FFFD}');
1648                            }
1649                        }
1650
1651                        while chunk_off < chunk.len()
1652                            && (chunk[chunk_off] < 0x20 || chunk[chunk_off] == 0x7f)
1653                        {
1654                            let ch = chunk[chunk_off];
1655                            chunk_off += 1;
1656
1657                            if ch == b'\t' {
1658                                cursor_tab = self.cursor_move_to_offset_internal(
1659                                    cursor_tab,
1660                                    global_off + chunk_off - 1,
1661                                );
1662                                let tab_size = self.tab_size - (cursor_tab.column % self.tab_size);
1663                                line.push_str(&TAB_WHITESPACE[..tab_size as usize]);
1664
1665                                // Since we know that we just aligned ourselves to the next tab stop,
1666                                // we can trivially process any successive tabs.
1667                                while chunk_off < chunk.len() && chunk[chunk_off] == b'\t' {
1668                                    line.push_str(&TAB_WHITESPACE[..self.tab_size as usize]);
1669                                    chunk_off += 1;
1670                                }
1671                                continue;
1672                            }
1673
1674                            visualizer_buf[2] = if ch == 0x7F {
1675                                0xA1 // U+2421
1676                            } else {
1677                                0x80 | ch // 0x00..=0x1F => U+2400..=U+241F
1678                            };
1679                            // Our manually constructed UTF8 is never going to be invalid. Trust.
1680                            line.push_str(unsafe { str::from_utf8_unchecked(&visualizer_buf) });
1681
1682                            cursor_visualizer = self.cursor_move_to_offset_internal(
1683                                cursor_visualizer,
1684                                global_off + chunk_off - 1,
1685                            );
1686                            let visualizer_rect = {
1687                                let left = destination.left
1688                                    + self.margin_width
1689                                    + cursor_visualizer.visual_pos.x
1690                                    - origin.x;
1691                                let top =
1692                                    destination.top + cursor_visualizer.visual_pos.y - origin.y;
1693                                Rect { left, top, right: left + 1, bottom: top + 1 }
1694                            };
1695
1696                            let bg = fb.indexed(IndexedColor::Yellow);
1697                            let fg = fb.contrasted(bg);
1698                            fb.blend_bg(visualizer_rect, bg);
1699                            fb.blend_fg(visualizer_rect, fg);
1700                        }
1701                    }
1702
1703                    global_off += chunk.len();
1704                }
1705
1706                visual_pos_x_max = visual_pos_x_max.max(cursor_end.visual_pos.x);
1707            }
1708
1709            fb.replace_text(destination.top + y, destination.left, destination.right, &line);
1710
1711            // Draw the selection on this line, if any.
1712            // FYI: `cursor_beg.visual_pos.y == visual_line` is necessary as the `visual_line`
1713            // may be past the end of the document, and so it may not receive a highlight.
1714            if cursor_beg.visual_pos.y == visual_line
1715                && selection_beg <= cursor_end.logical_pos
1716                && selection_end >= cursor_beg.logical_pos
1717            {
1718                // By default, we assume the entire line is selected.
1719                let mut beg = 0;
1720                let mut end = COORD_TYPE_SAFE_MAX;
1721                let mut cursor = cursor_beg;
1722
1723                // The start of the selection is within this line. We need to update selection_beg.
1724                if selection_beg <= cursor_end.logical_pos
1725                    && selection_beg >= cursor_beg.logical_pos
1726                {
1727                    cursor = self.cursor_move_to_logical_internal(cursor, selection_beg);
1728                    beg = cursor.visual_pos.x;
1729                }
1730
1731                // The end of the selection is within this line. We need to update selection_end.
1732                if selection_end <= cursor_end.logical_pos
1733                    && selection_end >= cursor_beg.logical_pos
1734                {
1735                    cursor = self.cursor_move_to_logical_internal(cursor, selection_end);
1736                    end = cursor.visual_pos.x;
1737                }
1738
1739                beg = beg.max(origin.x);
1740                end = end.min(origin.x + text_width);
1741
1742                let left = destination.left + self.margin_width - origin.x;
1743                let top = destination.top + y;
1744                let rect = Rect { left: left + beg, top, right: left + end, bottom: top + 1 };
1745
1746                let mut bg = oklab_blend(
1747                    fb.indexed(IndexedColor::Foreground),
1748                    fb.indexed_alpha(IndexedColor::BrightBlue, 1, 2),
1749                );
1750                if !focused {
1751                    bg = oklab_blend(bg, fb.indexed_alpha(IndexedColor::Background, 1, 2))
1752                };
1753                let fg = fb.contrasted(bg);
1754                fb.blend_bg(rect, bg);
1755                fb.blend_fg(rect, fg);
1756            }
1757
1758            cursor = cursor_end;
1759        }
1760
1761        // Colorize the margin that we wrote above.
1762        if self.margin_width > 0 {
1763            let margin = Rect {
1764                left: destination.left,
1765                top: destination.top,
1766                right: destination.left + self.margin_width,
1767                bottom: destination.bottom,
1768            };
1769            fb.blend_fg(margin, 0x7f3f3f3f);
1770        }
1771
1772        if self.ruler > 0 {
1773            let left = destination.left + self.margin_width + (self.ruler - origin.x).max(0);
1774            let right = destination.right;
1775            if left < right {
1776                fb.blend_bg(
1777                    Rect { left, top: destination.top, right, bottom: destination.bottom },
1778                    fb.indexed_alpha(IndexedColor::BrightRed, 1, 4),
1779                );
1780            }
1781        }
1782
1783        if focused {
1784            let mut x = self.cursor.visual_pos.x;
1785            let mut y = self.cursor.visual_pos.y;
1786
1787            if self.word_wrap_column > 0 && x >= self.word_wrap_column {
1788                // The line the cursor is on wraps exactly on the word wrap column which
1789                // means the cursor is invisible. We need to move it to the next line.
1790                x = 0;
1791                y += 1;
1792            }
1793
1794            // Move the cursor into screen space.
1795            x += destination.left - origin.x + self.margin_width;
1796            y += destination.top - origin.y;
1797
1798            let cursor = Point { x, y };
1799            let text = Rect {
1800                left: destination.left + self.margin_width,
1801                top: destination.top,
1802                right: destination.right,
1803                bottom: destination.bottom,
1804            };
1805
1806            if text.contains(cursor) {
1807                fb.set_cursor(cursor, self.overtype);
1808
1809                if self.line_highlight_enabled && selection_beg >= selection_end {
1810                    fb.blend_bg(
1811                        Rect {
1812                            left: destination.left,
1813                            top: cursor.y,
1814                            right: destination.right,
1815                            bottom: cursor.y + 1,
1816                        },
1817                        0x50282828,
1818                    );
1819                }
1820            }
1821        }
1822
1823        Some(RenderResult { visual_pos_x_max })
1824    }
1825
1826    pub fn cut(&mut self, clipboard: &mut Clipboard) {
1827        self.cut_copy(clipboard, true);
1828    }
1829
1830    pub fn copy(&mut self, clipboard: &mut Clipboard) {
1831        self.cut_copy(clipboard, false);
1832    }
1833
1834    fn cut_copy(&mut self, clipboard: &mut Clipboard, cut: bool) {
1835        let line_copy = !self.has_selection();
1836        let selection = self.extract_selection(cut);
1837        clipboard.write(selection);
1838        clipboard.write_was_line_copy(line_copy);
1839    }
1840
1841    pub fn paste(&mut self, clipboard: &Clipboard) {
1842        let data = clipboard.read();
1843        if data.is_empty() {
1844            return;
1845        }
1846
1847        let pos = self.cursor_logical_pos();
1848        let at = if clipboard.is_line_copy() {
1849            self.goto_line_start(self.cursor, pos.y)
1850        } else {
1851            self.cursor
1852        };
1853
1854        self.write(data, at, true);
1855
1856        if clipboard.is_line_copy() {
1857            self.cursor_move_to_logical(Point { x: pos.x, y: pos.y + 1 });
1858        }
1859    }
1860
1861    /// Inserts the user input `text` at the current cursor position.
1862    /// Replaces tabs with whitespace if needed, etc.
1863    pub fn write_canon(&mut self, text: &[u8]) {
1864        self.write(text, self.cursor, false);
1865    }
1866
1867    /// Inserts `text` as-is at the current cursor position.
1868    /// The only transformation applied is that newlines are normalized.
1869    pub fn write_raw(&mut self, text: &[u8]) {
1870        self.write(text, self.cursor, true);
1871    }
1872
1873    fn write(&mut self, text: &[u8], at: Cursor, raw: bool) {
1874        let history_type = if raw { HistoryType::Other } else { HistoryType::Write };
1875
1876        // If we have an active selection, writing an empty `text`
1877        // will still delete the selection. As such, we check this first.
1878        if let Some((beg, end)) = self.selection_range_internal(false) {
1879            self.edit_begin(history_type, beg);
1880            self.edit_delete(end);
1881            self.set_selection(None);
1882        }
1883
1884        // If the text is empty the remaining code won't do anything,
1885        // allowing us to exit early.
1886        if text.is_empty() {
1887            // ...we still need to end any active edit session though.
1888            if self.active_edit_depth > 0 {
1889                self.edit_end();
1890            }
1891            return;
1892        }
1893
1894        if self.active_edit_depth <= 0 {
1895            self.edit_begin(history_type, at);
1896        }
1897
1898        let mut offset = 0;
1899        let scratch = scratch_arena(None);
1900        let mut newline_buffer = ArenaString::new_in(&scratch);
1901
1902        loop {
1903            // Can't use `unicode::newlines_forward` because bracketed paste uses CR instead of LF/CRLF.
1904            let offset_next = memchr2(b'\r', b'\n', text, offset);
1905            let line = &text[offset..offset_next];
1906            let column_before = self.cursor.logical_pos.x;
1907
1908            // Write the contents of the line into the buffer.
1909            let mut line_off = 0;
1910            while line_off < line.len() {
1911                // Split the line into chunks of non-tabs and tabs.
1912                let mut plain = line;
1913                if !raw && !self.indent_with_tabs {
1914                    let end = memchr2(b'\t', b'\t', line, line_off);
1915                    plain = &line[line_off..end];
1916                }
1917
1918                // Non-tabs are written as-is, because the outer loop already handles newline translation.
1919                self.edit_write(plain);
1920                line_off += plain.len();
1921
1922                // Now replace tabs with spaces.
1923                while line_off < line.len() && line[line_off] == b'\t' {
1924                    let spaces = self.tab_size - (self.cursor.column % self.tab_size);
1925                    let spaces = &TAB_WHITESPACE.as_bytes()[..spaces as usize];
1926                    self.edit_write(spaces);
1927                    line_off += 1;
1928                }
1929            }
1930
1931            if !raw && self.overtype {
1932                let delete = self.cursor.logical_pos.x - column_before;
1933                let end = self.cursor_move_to_logical_internal(
1934                    self.cursor,
1935                    Point { x: self.cursor.logical_pos.x + delete, y: self.cursor.logical_pos.y },
1936                );
1937                self.edit_delete(end);
1938            }
1939
1940            offset += line.len();
1941            if offset >= text.len() {
1942                break;
1943            }
1944
1945            // First, write the newline.
1946            newline_buffer.clear();
1947            newline_buffer.push_str(if self.newlines_are_crlf { "\r\n" } else { "\n" });
1948
1949            if !raw {
1950                // We'll give the next line the same indentation as the previous one.
1951                // This block figures out how much that is. We can't reuse that value,
1952                // because "  a\n  a\n" should give the 3rd line a total indentation of 4.
1953                // Assuming your terminal has bracketed paste, this won't be a concern though.
1954                // (If it doesn't, use a different terminal.)
1955                let tab_size = self.tab_size as usize;
1956                let line_beg = self.goto_line_start(self.cursor, self.cursor.logical_pos.y);
1957                let limit = self.cursor.offset;
1958                let mut off = line_beg.offset;
1959                let mut newline_indentation = 0usize;
1960
1961                'outer: while off < limit {
1962                    let chunk = self.read_forward(off);
1963                    let chunk = &chunk[..chunk.len().min(limit - off)];
1964
1965                    for &c in chunk {
1966                        if c == b' ' {
1967                            newline_indentation += 1;
1968                        } else if c == b'\t' {
1969                            newline_indentation += tab_size - (newline_indentation % tab_size);
1970                        } else {
1971                            break 'outer;
1972                        }
1973                    }
1974
1975                    off += chunk.len();
1976                }
1977
1978                // If tabs are enabled, add as many tabs as we can.
1979                if self.indent_with_tabs {
1980                    let tab_count = newline_indentation / tab_size;
1981                    newline_buffer.push_repeat('\t', tab_count);
1982                    newline_indentation -= tab_count * tab_size;
1983                }
1984
1985                // If tabs are disabled, or if the indentation wasn't a multiple of the tab size,
1986                // add spaces to make up the difference.
1987                newline_buffer.push_repeat(' ', newline_indentation);
1988            }
1989
1990            self.edit_write(newline_buffer.as_bytes());
1991
1992            // Skip one CR/LF/CRLF.
1993            if offset >= text.len() {
1994                break;
1995            }
1996            if text[offset] == b'\r' {
1997                offset += 1;
1998            }
1999            if offset >= text.len() {
2000                break;
2001            }
2002            if text[offset] == b'\n' {
2003                offset += 1;
2004            }
2005            if offset >= text.len() {
2006                break;
2007            }
2008        }
2009
2010        // POSIX mandates that all valid lines end in a newline.
2011        // This isn't all that common on Windows and so we have
2012        // `self.final_newline` to control this.
2013        //
2014        // In order to not annoy people with this, we only add a
2015        // newline if you just edited the very end of the buffer.
2016        if self.insert_final_newline
2017            && self.cursor.offset > 0
2018            && self.cursor.offset == self.text_length()
2019            && self.cursor.logical_pos.x > 0
2020        {
2021            let cursor = self.cursor;
2022            self.edit_write(if self.newlines_are_crlf { b"\r\n" } else { b"\n" });
2023            self.set_cursor_internal(cursor);
2024        }
2025
2026        self.edit_end();
2027    }
2028
2029    /// Deletes 1 grapheme cluster from the buffer.
2030    /// `cursor_movements` is expected to be -1 for backspace and 1 for delete.
2031    /// If there's a current selection, it will be deleted and `cursor_movements` ignored.
2032    /// The selection is cleared after the call.
2033    /// Deletes characters from the buffer based on a delta from the cursor.
2034    pub fn delete(&mut self, granularity: CursorMovement, delta: CoordType) {
2035        if delta == 0 {
2036            return;
2037        }
2038
2039        let mut beg;
2040        let mut end;
2041
2042        if let Some(r) = self.selection_range_internal(false) {
2043            (beg, end) = r;
2044        } else {
2045            if (delta < 0 && self.cursor.offset == 0)
2046                || (delta > 0 && self.cursor.offset >= self.text_length())
2047            {
2048                // Nothing to delete.
2049                return;
2050            }
2051
2052            beg = self.cursor;
2053            end = self.cursor_move_delta_internal(beg, granularity, delta);
2054            if beg.offset == end.offset {
2055                return;
2056            }
2057            if beg.offset > end.offset {
2058                mem::swap(&mut beg, &mut end);
2059            }
2060        }
2061
2062        self.edit_begin(HistoryType::Delete, beg);
2063        self.edit_delete(end);
2064        self.edit_end();
2065
2066        self.set_selection(None);
2067    }
2068
2069    /// Returns the logical position of the first character on this line.
2070    /// Return `.x == 0` if there are no non-whitespace characters.
2071    pub fn indent_end_logical_pos(&self) -> Point {
2072        let cursor = self.goto_line_start(self.cursor, self.cursor.logical_pos.y);
2073        let mut chars = 0;
2074        let mut offset = cursor.offset;
2075
2076        'outer: loop {
2077            let chunk = self.read_forward(offset);
2078            if chunk.is_empty() {
2079                break;
2080            }
2081
2082            for &c in chunk {
2083                if c == b'\n' || c == b'\r' || (c != b' ' && c != b'\t') {
2084                    break 'outer;
2085                }
2086                chars += 1;
2087            }
2088
2089            offset += chunk.len();
2090        }
2091
2092        Point { x: chars, y: cursor.logical_pos.y }
2093    }
2094
2095    /// Unindents the current selection or line.
2096    ///
2097    /// TODO: This function is ripe for some optimizations:
2098    /// * Instead of replacing the entire selection,
2099    ///   it should unindent each line directly (as if multiple cursors had been used).
2100    /// * The cursor movement at the end is rather costly, but at least without word wrap
2101    ///   it should be possible to calculate it directly from the removed amount.
2102    pub fn unindent(&mut self) {
2103        let mut selection_beg = self.cursor.logical_pos;
2104        let mut selection_end = selection_beg;
2105
2106        if let Some(TextBufferSelection { beg, end }) = self.selection {
2107            selection_beg = beg;
2108            selection_end = end;
2109        }
2110
2111        let [beg, end] = minmax(selection_beg, selection_end);
2112        let beg = self.cursor_move_to_logical_internal(self.cursor, Point { x: 0, y: beg.y });
2113        let end = self.cursor_move_to_logical_internal(beg, Point { x: CoordType::MAX, y: end.y });
2114
2115        let mut replacement = Vec::new();
2116        self.buffer.extract_raw(beg.offset..end.offset, &mut replacement, 0);
2117
2118        let initial_len = replacement.len();
2119        let mut offset = 0;
2120        let mut y = beg.logical_pos.y;
2121
2122        loop {
2123            if offset >= replacement.len() {
2124                break;
2125            }
2126
2127            let mut remove = 0;
2128
2129            if replacement[offset] == b'\t' {
2130                remove = 1;
2131            } else {
2132                while remove < self.tab_size as usize
2133                    && offset + remove < replacement.len()
2134                    && replacement[offset + remove] == b' '
2135                {
2136                    remove += 1;
2137                }
2138            }
2139
2140            if remove > 0 {
2141                replacement.drain(offset..offset + remove);
2142            }
2143
2144            if y == selection_beg.y {
2145                selection_beg.x -= remove as CoordType;
2146            }
2147            if y == selection_end.y {
2148                selection_end.x -= remove as CoordType;
2149            }
2150
2151            (offset, y) = simd::lines_fwd(&replacement, offset, y, y + 1);
2152        }
2153
2154        if replacement.len() == initial_len {
2155            // Nothing to do.
2156            return;
2157        }
2158
2159        self.edit_begin(HistoryType::Other, beg);
2160        self.edit_delete(end);
2161        self.edit_write(&replacement);
2162        self.edit_end();
2163
2164        if let Some(TextBufferSelection { beg, end }) = &mut self.selection {
2165            *beg = selection_beg;
2166            *end = selection_end;
2167        }
2168
2169        self.set_cursor_internal(self.cursor_move_to_logical_internal(self.cursor, selection_end));
2170    }
2171
2172    /// Extracts the contents of the current selection.
2173    /// May optionally delete it, if requested. This is meant to be used for Ctrl+X.
2174    fn extract_selection(&mut self, delete: bool) -> Vec<u8> {
2175        let line_copy = !self.has_selection();
2176        let Some((beg, end)) = self.selection_range_internal(true) else {
2177            return Vec::new();
2178        };
2179
2180        let mut out = Vec::new();
2181        self.buffer.extract_raw(beg.offset..end.offset, &mut out, 0);
2182
2183        if delete && !out.is_empty() {
2184            self.edit_begin(HistoryType::Delete, beg);
2185            self.edit_delete(end);
2186            self.edit_end();
2187            self.set_selection(None);
2188        }
2189
2190        // Line copies (= Ctrl+C when there's no selection) always end with a newline.
2191        if line_copy && !out.ends_with(b"\n") {
2192            out.replace_range(out.len().., if self.newlines_are_crlf { b"\r\n" } else { b"\n" });
2193        }
2194
2195        out
2196    }
2197
2198    /// Extracts the contents of the current selection the user made.
2199    /// This differs from [`TextBuffer::extract_selection()`] in that
2200    /// it does nothing if the selection was made by searching.
2201    pub fn extract_user_selection(&mut self, delete: bool) -> Option<Vec<u8>> {
2202        if !self.has_selection() {
2203            return None;
2204        }
2205
2206        if let Some(search) = &self.search {
2207            let search = unsafe { &*search.get() };
2208            if search.selection_generation == self.selection_generation {
2209                return None;
2210            }
2211        }
2212
2213        Some(self.extract_selection(delete))
2214    }
2215
2216    /// Returns the current selection anchors, or `None` if there
2217    /// is no selection. The returned logical positions are sorted.
2218    pub fn selection_range(&self) -> Option<(Cursor, Cursor)> {
2219        self.selection_range_internal(false)
2220    }
2221
2222    /// Returns the current selection anchors.
2223    ///
2224    /// If there's no selection and `line_fallback` is `true`,
2225    /// the start/end of the current line are returned.
2226    /// This is meant to be used for Ctrl+C / Ctrl+X.
2227    fn selection_range_internal(&self, line_fallback: bool) -> Option<(Cursor, Cursor)> {
2228        let [beg, end] = match self.selection {
2229            None if !line_fallback => return None,
2230            None => [
2231                Point { x: 0, y: self.cursor.logical_pos.y },
2232                Point { x: 0, y: self.cursor.logical_pos.y + 1 },
2233            ],
2234            Some(TextBufferSelection { beg, end }) => minmax(beg, end),
2235        };
2236
2237        let beg = self.cursor_move_to_logical_internal(self.cursor, beg);
2238        let end = self.cursor_move_to_logical_internal(beg, end);
2239
2240        if beg.offset < end.offset { Some((beg, end)) } else { None }
2241    }
2242
2243    /// Starts a new edit operation.
2244    /// This is used for tracking the undo/redo history.
2245    fn edit_begin(&mut self, history_type: HistoryType, cursor: Cursor) {
2246        self.active_edit_depth += 1;
2247        if self.active_edit_depth > 1 {
2248            return;
2249        }
2250
2251        let cursor_before = self.cursor;
2252        self.set_cursor_internal(cursor);
2253
2254        // If both the last and this are a Write/Delete operation, we skip allocating a new undo history item.
2255        if history_type != self.last_history_type
2256            || !matches!(history_type, HistoryType::Write | HistoryType::Delete)
2257        {
2258            self.redo_stack.clear();
2259            while self.undo_stack.len() > 1000 {
2260                self.undo_stack.pop_front();
2261            }
2262
2263            self.last_history_type = history_type;
2264            self.undo_stack.push_back(SemiRefCell::new(HistoryEntry {
2265                cursor_before: cursor_before.logical_pos,
2266                selection_before: self.selection,
2267                stats_before: self.stats,
2268                generation_before: self.buffer.generation(),
2269                cursor: cursor.logical_pos,
2270                deleted: Vec::new(),
2271                added: Vec::new(),
2272            }));
2273        }
2274
2275        self.active_edit_off = cursor.offset;
2276
2277        // If word-wrap is enabled, the visual layout of all logical lines affected by the write
2278        // may have changed. This includes even text before the insertion point up to the line
2279        // start, because this write may have joined with a word before the initial cursor.
2280        // See other uses of `word_wrap_cursor_next_line` in this function.
2281        if self.word_wrap_column > 0 {
2282            let safe_start = self.goto_line_start(cursor, cursor.logical_pos.y);
2283            let next_line = self.cursor_move_to_logical_internal(
2284                cursor,
2285                Point { x: 0, y: cursor.logical_pos.y + 1 },
2286            );
2287            self.active_edit_line_info = Some(ActiveEditLineInfo {
2288                safe_start,
2289                line_height_in_rows: next_line.visual_pos.y - safe_start.visual_pos.y,
2290                distance_next_line_start: next_line.offset - cursor.offset,
2291            });
2292        }
2293    }
2294
2295    /// Writes `text` into the buffer at the current cursor position.
2296    /// It records the change in the undo stack.
2297    fn edit_write(&mut self, text: &[u8]) {
2298        let logical_y_before = self.cursor.logical_pos.y;
2299
2300        // Copy the written portion into the undo entry.
2301        {
2302            let mut undo = self.undo_stack.back_mut().unwrap().borrow_mut();
2303            undo.added.extend_from_slice(text);
2304        }
2305
2306        // Write!
2307        self.buffer.replace(self.active_edit_off..self.active_edit_off, text);
2308
2309        // Move self.cursor to the end of the newly written text. Can't use `self.set_cursor_internal`,
2310        // because we're still in the progress of recalculating the line stats.
2311        self.active_edit_off += text.len();
2312        self.cursor = self.cursor_move_to_offset_internal(self.cursor, self.active_edit_off);
2313        self.stats.logical_lines += self.cursor.logical_pos.y - logical_y_before;
2314    }
2315
2316    /// Deletes the text between the current cursor position and `to`.
2317    /// It records the change in the undo stack.
2318    fn edit_delete(&mut self, to: Cursor) {
2319        debug_assert!(to.offset >= self.active_edit_off);
2320
2321        let logical_y_before = self.cursor.logical_pos.y;
2322        let off = self.active_edit_off;
2323        let mut out_off = usize::MAX;
2324
2325        let mut undo = self.undo_stack.back_mut().unwrap().borrow_mut();
2326        if self.cursor.logical_pos < undo.cursor {
2327            out_off = 0; // Prepend the deleted portion.
2328            undo.cursor = self.cursor.logical_pos; // Note the start of the deleted portion.
2329        }
2330
2331        // Copy the deleted portion into the undo entry.
2332        let deleted = &mut undo.deleted;
2333        self.buffer.extract_raw(off..to.offset, deleted, out_off);
2334
2335        // Delete the portion from the buffer by enlarging the gap.
2336        let count = to.offset - off;
2337        self.buffer.allocate_gap(off, 0, count);
2338
2339        self.stats.logical_lines += logical_y_before - to.logical_pos.y;
2340    }
2341
2342    /// Finalizes the current edit operation
2343    /// and recalculates the line statistics.
2344    fn edit_end(&mut self) {
2345        self.active_edit_depth -= 1;
2346        assert!(self.active_edit_depth >= 0);
2347        if self.active_edit_depth > 0 {
2348            return;
2349        }
2350
2351        #[cfg(debug_assertions)]
2352        {
2353            let entry = self.undo_stack.back_mut().unwrap().borrow_mut();
2354            debug_assert!(!entry.deleted.is_empty() || !entry.added.is_empty());
2355        }
2356
2357        if let Some(info) = self.active_edit_line_info.take() {
2358            let deleted_count = self.undo_stack.back_mut().unwrap().borrow_mut().deleted.len();
2359            let target = self.cursor.logical_pos;
2360
2361            // From our safe position we can measure the actual visual position of the cursor.
2362            self.set_cursor_internal(self.cursor_move_to_logical_internal(info.safe_start, target));
2363
2364            // If content is added at the insertion position, that's not a problem:
2365            // We can just remeasure the height of this one line and calculate the delta.
2366            // `deleted_count` is 0 in this case.
2367            //
2368            // The problem is when content is deleted, because it may affect lines
2369            // beyond the end of the `next_line`. In that case we have to measure
2370            // the entire buffer contents until the end to compute `self.stats.visual_lines`.
2371            if deleted_count < info.distance_next_line_start {
2372                // Now we can measure how many more visual rows this logical line spans.
2373                let next_line = self
2374                    .cursor_move_to_logical_internal(self.cursor, Point { x: 0, y: target.y + 1 });
2375                let lines_before = info.line_height_in_rows;
2376                let lines_after = next_line.visual_pos.y - info.safe_start.visual_pos.y;
2377                self.stats.visual_lines += lines_after - lines_before;
2378            } else {
2379                let end = self.cursor_move_to_logical_internal(self.cursor, Point::MAX);
2380                self.stats.visual_lines = end.visual_pos.y + 1;
2381            }
2382        } else {
2383            // If word-wrap is disabled the visual line count always matches the logical one.
2384            self.stats.visual_lines = self.stats.logical_lines;
2385        }
2386
2387        self.recalc_after_content_changed();
2388    }
2389
2390    /// Undo the last edit operation.
2391    pub fn undo(&mut self) {
2392        self.undo_redo(true);
2393    }
2394
2395    /// Redo the last undo operation.
2396    pub fn redo(&mut self) {
2397        self.undo_redo(false);
2398    }
2399
2400    fn undo_redo(&mut self, undo: bool) {
2401        // Transfer the last entry from the undo stack to the redo stack or vice versa.
2402        {
2403            let (from, to) = if undo {
2404                (&mut self.undo_stack, &mut self.redo_stack)
2405            } else {
2406                (&mut self.redo_stack, &mut self.undo_stack)
2407            };
2408
2409            let Some(list) = from.cursor_back_mut().remove_current_as_list() else {
2410                return;
2411            };
2412
2413            to.cursor_back_mut().splice_after(list);
2414        }
2415
2416        let change = {
2417            let to = if undo { &self.redo_stack } else { &self.undo_stack };
2418            to.back().unwrap()
2419        };
2420
2421        // Move to the point where the modification took place.
2422        let cursor = self.cursor_move_to_logical_internal(self.cursor, change.borrow().cursor);
2423
2424        let safe_cursor = if self.word_wrap_column > 0 {
2425            // If word-wrap is enabled, we need to move the cursor to the beginning of the line.
2426            // This is because the undo/redo operation may have changed the visual position of the cursor.
2427            self.goto_line_start(cursor, cursor.logical_pos.y)
2428        } else {
2429            cursor
2430        };
2431
2432        {
2433            let buffer_generation = self.buffer.generation();
2434            let mut change = change.borrow_mut();
2435            let change = &mut *change;
2436
2437            // Undo: Whatever was deleted is now added and vice versa.
2438            mem::swap(&mut change.deleted, &mut change.added);
2439
2440            // Delete the inserted portion.
2441            self.buffer.allocate_gap(cursor.offset, 0, change.deleted.len());
2442
2443            // Reinsert the deleted portion.
2444            {
2445                let added = &change.added[..];
2446                let mut beg = 0;
2447                let mut offset = cursor.offset;
2448
2449                while beg < added.len() {
2450                    let (end, line) = simd::lines_fwd(added, beg, 0, 1);
2451                    let has_newline = line != 0;
2452                    let link = &added[beg..end];
2453                    let line = unicode::strip_newline(link);
2454                    let mut written;
2455
2456                    {
2457                        let gap = self.buffer.allocate_gap(offset, line.len() + 2, 0);
2458                        written = slice_copy_safe(gap, line);
2459
2460                        if has_newline {
2461                            if self.newlines_are_crlf && written < gap.len() {
2462                                gap[written] = b'\r';
2463                                written += 1;
2464                            }
2465                            if written < gap.len() {
2466                                gap[written] = b'\n';
2467                                written += 1;
2468                            }
2469                        }
2470
2471                        self.buffer.commit_gap(written);
2472                    }
2473
2474                    beg = end;
2475                    offset += written;
2476                }
2477            }
2478
2479            // Restore the previous line statistics.
2480            mem::swap(&mut self.stats, &mut change.stats_before);
2481
2482            // Restore the previous selection.
2483            mem::swap(&mut self.selection, &mut change.selection_before);
2484
2485            // Pretend as if the buffer was never modified.
2486            self.buffer.set_generation(change.generation_before);
2487            change.generation_before = buffer_generation;
2488
2489            // Restore the previous cursor.
2490            let cursor_before =
2491                self.cursor_move_to_logical_internal(safe_cursor, change.cursor_before);
2492            change.cursor_before = self.cursor.logical_pos;
2493            // Can't use `set_cursor_internal` here, because we haven't updated the line stats yet.
2494            self.cursor = cursor_before;
2495
2496            if self.undo_stack.is_empty() {
2497                self.last_history_type = HistoryType::Other;
2498            }
2499        }
2500
2501        self.recalc_after_content_changed();
2502    }
2503
2504    /// For interfacing with ICU.
2505    pub(crate) fn read_backward(&self, off: usize) -> &[u8] {
2506        self.buffer.read_backward(off)
2507    }
2508
2509    /// For interfacing with ICU.
2510    pub fn read_forward(&self, off: usize) -> &[u8] {
2511        self.buffer.read_forward(off)
2512    }
2513}
2514
2515pub enum Bom {
2516    None,
2517    UTF8,
2518    UTF16LE,
2519    UTF16BE,
2520    UTF32LE,
2521    UTF32BE,
2522    GB18030,
2523}
2524
2525const BOM_MAX_LEN: usize = 4;
2526
2527fn detect_bom(bytes: &[u8]) -> Option<&'static str> {
2528    if bytes.len() >= 4 {
2529        if bytes.starts_with(b"\xFF\xFE\x00\x00") {
2530            return Some("UTF-32LE");
2531        }
2532        if bytes.starts_with(b"\x00\x00\xFE\xFF") {
2533            return Some("UTF-32BE");
2534        }
2535        if bytes.starts_with(b"\x84\x31\x95\x33") {
2536            return Some("GB18030");
2537        }
2538    }
2539    if bytes.len() >= 3 && bytes.starts_with(b"\xEF\xBB\xBF") {
2540        return Some("UTF-8");
2541    }
2542    if bytes.len() >= 2 {
2543        if bytes.starts_with(b"\xFF\xFE") {
2544            return Some("UTF-16LE");
2545        }
2546        if bytes.starts_with(b"\xFE\xFF") {
2547            return Some("UTF-16BE");
2548        }
2549    }
2550    None
2551}