use memchr::{memchr2, memchr2_iter};
use ropey::{Rope, RopeBuilder};
use std::cell::Cell;
use std::io;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering};
use std::sync::{mpsc, Arc, RwLock};
use std::thread;
use std::time::{Duration, Instant};
use super::storage::{FileStorage, StorageOpenError};
use crate::index::DiskLineIndex;
use crate::piece_tree::{editlog_path, Piece, PieceSource, PieceTree, SessionMeta};
mod commands;
mod compaction;
mod editing;
mod lifecycle;
mod persistence;
mod positions;
mod reads;
mod search;
mod state;
mod types;
pub use crate::piece_tree::FragmentationStats;
pub use compaction::{
CompactionPolicy, CompactionRecommendation, CompactionUrgency, IdleCompactionOutcome,
};
#[cfg(feature = "editor")]
pub(crate) use lifecycle::OpenProgressPhase;
#[cfg(feature = "editor")]
pub(crate) use persistence::{PreparedSave, SaveCompletion};
pub use search::{LiteralSearchIter, LiteralSearchQuery};
pub use types::{
ByteProgress, CutResult, DocumentBacking, DocumentEncoding, DocumentEncodingErrorKind,
DocumentEncodingOrigin, DocumentError, DocumentMaintenanceStatus, DocumentOpenOptions,
DocumentSaveOptions, DocumentStatus, EditCapability, EditResult, LineCount, LineSlice,
MaintenanceAction, OpenEncodingPolicy, SaveEncodingPolicy, SearchMatch, TextPosition,
TextRange, TextSelection, TextSlice, Viewport, ViewportRequest, ViewportRow,
};
const FULL_INDEX_MAX_FILE_BYTES: usize = 2 * 1024 * 1024 * 1024; const MAX_INDEXED_BYTES: usize = 1024 * 1024 * 1024; const MAX_LINE_OFFSETS_BYTES: usize = 128 * 1024 * 1024; const INLINE_FULL_INDEX_MAX_FILE_BYTES: usize = 8 * 1024 * 1024; const INDEXER_YIELD_EVERY_BYTES: usize = 4 * 1024 * 1024; const AVG_LINE_LEN_ESTIMATE: usize = 50;
const AVG_LINE_LEN_SAMPLE_BYTES: usize = 256 * 1024; const PIECE_TABLE_MIN_BYTES: usize = 1024 * 1024; const MAX_LINE_SCAN_CHARS: usize = 16_384;
const LINE_LENGTHS_MAX_SYNC_LINES: usize = 4_000_000;
const PARTIAL_PIECE_TABLE_TARGET_LINES: usize = 4_096;
const PARTIAL_PIECE_TABLE_MAX_LINES: usize = LINE_LENGTHS_MAX_SYNC_LINES;
const PARTIAL_PIECE_TABLE_SCAN_BYTES: usize = 16 * 1024 * 1024; const APPROX_LINE_BACKTRACK_BYTES: usize = 64 * 1024;
const APPROX_LINE_FORWARD_BYTES: usize = 256 * 1024;
const TAIL_FAST_PATH_MAX_BACKSCAN_BYTES: usize = 1024 * 1024; const FALLBACK_NEXT_LINE_SCAN_BYTES: usize = 1024 * 1024; const SAVE_STREAM_CHUNK_BYTES: usize = 8 * 1024 * 1024; const MAX_ROPE_EDIT_FILE_BYTES: usize = 128 * 1024 * 1024; const FULL_SYNC_PIECE_TABLE_MAX_FILE_BYTES: usize = 64 * 1024 * 1024; const PIECE_TREE_TARGET_BYTES: usize = 64 * 1024;
const PIECE_TREE_TARGET_LINES: usize = 512;
const PIECE_TREE_DISK_MIN_BYTES: usize = PIECE_TABLE_MIN_BYTES;
const PIECE_SESSION_FLUSH_DEBOUNCE: Duration = Duration::from_millis(250);
const PIECE_SESSION_FORCE_AFTER_EDITS: usize = 32;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
pub enum LineEnding {
#[default]
Lf,
Crlf,
Cr,
}
impl LineEnding {
pub fn as_str(self) -> &'static str {
match self {
Self::Lf => "\n",
Self::Crlf => "\r\n",
Self::Cr => "\r",
}
}
}
fn detect_line_ending(bytes: &[u8]) -> LineEnding {
let Some(pos) = memchr2(b'\n', b'\r', bytes) else {
return LineEnding::Lf;
};
match bytes[pos] {
b'\n' if pos > 0 && bytes[pos - 1] == b'\r' => LineEnding::Crlf,
b'\n' => LineEnding::Lf,
b'\r' if pos + 1 < bytes.len() && bytes[pos + 1] == b'\n' => LineEnding::Crlf,
b'\r' => LineEnding::Cr,
_ => LineEnding::Lf,
}
}
fn detect_line_ending_text(text: &str) -> LineEnding {
let bytes = text.as_bytes();
let Some(pos) = memchr2(b'\n', b'\r', bytes) else {
return LineEnding::Lf;
};
match bytes[pos] {
b'\n' if pos > 0 && bytes[pos - 1] == b'\r' => LineEnding::Crlf,
b'\n' => LineEnding::Lf,
b'\r' if pos + 1 < bytes.len() && bytes[pos + 1] == b'\n' => LineEnding::Crlf,
b'\r' => LineEnding::Cr,
_ => LineEnding::Lf,
}
}
fn normalize_insert_text(
text: &str,
virtual_padding_cols: usize,
line_ending: LineEnding,
) -> (String, usize, usize) {
let mut normalized = String::with_capacity(
text.len()
.saturating_add(virtual_padding_cols)
.saturating_add(text.matches('\n').count()),
);
if virtual_padding_cols > 0 {
normalized.extend(std::iter::repeat_n(' ', virtual_padding_cols));
}
let mut added_lines = 0usize;
let mut last_col = 0usize;
let mut chars = text.chars().peekable();
while let Some(ch) = chars.next() {
match ch {
'\r' => {
if chars.peek() == Some(&'\n') {
let _ = chars.next();
}
normalized.push_str(line_ending.as_str());
added_lines += 1;
last_col = 0;
}
'\n' => {
normalized.push_str(line_ending.as_str());
added_lines += 1;
last_col = 0;
}
_ => {
normalized.push(ch);
last_col += 1;
}
}
}
(normalized, added_lines, last_col)
}
fn build_rope_from_bytes(bytes: &[u8]) -> (Rope, bool) {
if bytes.is_empty() {
return (Rope::new(), false);
}
let mut builder = RopeBuilder::new();
let mut decoder = encoding_rs::UTF_8.new_decoder();
let mut input = bytes;
let mut out = [0u8; 8192];
let mut prev_was_cr = false;
let mut had_errors = false;
loop {
let last = input.is_empty();
let (result, read, written, chunk_had_errors) =
decoder.decode_to_utf8(input, &mut out, last);
had_errors |= chunk_had_errors;
if written > 0 {
if let Ok(s) = std::str::from_utf8(&out[..written]) {
if !s.is_empty() {
let mut normalized = String::with_capacity(s.len());
for ch in s.chars() {
if prev_was_cr {
prev_was_cr = false;
if ch == '\n' {
continue;
}
}
if ch == '\r' {
normalized.push('\n');
prev_was_cr = true;
} else {
normalized.push(ch);
}
}
if !normalized.is_empty() {
builder.append(&normalized);
}
}
}
}
input = &input[read..];
match result {
encoding_rs::CoderResult::InputEmpty => {
if last {
break;
}
}
encoding_rs::CoderResult::OutputFull => {}
}
}
(builder.finish(), had_errors)
}
fn normalize_decoded_text(text: &str) -> String {
let mut normalized = String::with_capacity(text.len());
let mut chars = text.chars().peekable();
while let Some(ch) = chars.next() {
match ch {
'\r' => {
if chars.peek() == Some(&'\n') {
let _ = chars.next();
}
normalized.push('\n');
}
_ => normalized.push(ch),
}
}
normalized
}
fn build_rope_from_decoded_text(text: &str) -> Rope {
if text.is_empty() {
return Rope::new();
}
let normalized = normalize_decoded_text(text);
let mut builder = RopeBuilder::new();
builder.append(&normalized);
builder.finish()
}
fn rope_text_with_line_endings(rope: &Rope, line_ending: LineEnding) -> String {
if line_ending == LineEnding::Lf {
return rope.to_string();
}
let newline = line_ending.as_str();
let extra_per_break = newline.len().saturating_sub(1);
let mut rendered = String::with_capacity(
rope.len_bytes().saturating_add(
rope.len_lines()
.saturating_sub(1)
.saturating_mul(extra_per_break),
),
);
for chunk in rope.chunks() {
for ch in chunk.chars() {
if ch == '\n' {
rendered.push_str(newline);
} else {
rendered.push(ch);
}
}
}
rendered
}
fn decode_text_with_encoding(bytes: &[u8], encoding: DocumentEncoding) -> (String, bool) {
let (decoded, had_errors) = encoding.as_encoding().decode_with_bom_removal(bytes);
(decoded.into_owned(), had_errors)
}
fn encode_text_with_encoding(
text: &str,
encoding: DocumentEncoding,
) -> Result<Vec<u8>, DocumentEncodingErrorKind> {
if !encoding.can_roundtrip_save() {
return Err(DocumentEncodingErrorKind::UnsupportedSaveTarget);
}
let (encoded, output_encoding, had_errors) = encoding.as_encoding().encode(text);
if output_encoding != encoding.as_encoding() {
return Err(DocumentEncodingErrorKind::RedirectedSaveTarget {
actual: DocumentEncoding::from_encoding_rs(output_encoding),
});
}
if had_errors {
return Err(DocumentEncodingErrorKind::UnrepresentableText);
}
Ok(encoded.into_owned())
}
fn rope_save_len_bytes(rope: &Rope, line_ending: LineEnding) -> usize {
let extra_per_break = match line_ending {
LineEnding::Lf => 0,
LineEnding::Crlf => 1,
LineEnding::Cr => 0,
};
rope.len_bytes().saturating_add(
rope.len_lines()
.saturating_sub(1)
.saturating_mul(extra_per_break),
)
}
fn line_lengths_from_offsets(offsets: &LineOffsets, file_len: usize) -> Vec<usize> {
let len = offsets.len().max(1);
let mut lengths = Vec::with_capacity(len);
match offsets {
LineOffsets::U32(v) => {
for i in 0..v.len() {
let start = v[i] as usize;
let end = v
.get(i + 1)
.copied()
.map(|v| v as usize)
.unwrap_or(file_len);
lengths.push(end.saturating_sub(start));
}
}
LineOffsets::U64(v) => {
for i in 0..v.len() {
let start = v[i] as usize;
let end = v
.get(i + 1)
.copied()
.map(|v| v as usize)
.unwrap_or(file_len);
lengths.push(end.saturating_sub(start));
}
}
}
if lengths.is_empty() {
lengths.push(file_len);
}
lengths
}
fn prefix_line_lengths_from_offsets(offsets: &LineOffsets, max_lines: usize) -> Vec<usize> {
let complete_lines = offsets.len().saturating_sub(1).min(max_lines);
let mut lengths = Vec::with_capacity(complete_lines.max(1));
match offsets {
LineOffsets::U32(v) => {
for i in 0..complete_lines {
let start = v[i] as usize;
let end = v[i + 1] as usize;
lengths.push(end.saturating_sub(start));
}
}
LineOffsets::U64(v) => {
for i in 0..complete_lines {
let start = v[i] as usize;
let end = v[i + 1] as usize;
lengths.push(end.saturating_sub(start));
}
}
}
lengths
}
fn line_lengths_from_bytes(bytes: &[u8], max_lines: usize) -> Option<Vec<usize>> {
if bytes.is_empty() {
return Some(vec![0]);
}
let est_lines = (bytes.len() / AVG_LINE_LEN_ESTIMATE).saturating_add(2);
let mut lengths = Vec::with_capacity(est_lines.min(max_lines.max(1)));
let mut line_start = 0usize;
for i in memchr2_iter(b'\n', b'\r', bytes) {
let b = bytes[i];
if b == b'\r' && i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
continue;
}
if lengths.len() >= max_lines {
return None;
}
lengths.push((i + 1).saturating_sub(line_start));
line_start = i + 1;
}
if lengths.len() >= max_lines {
return None;
}
lengths.push(bytes.len().saturating_sub(line_start));
Some(lengths)
}
fn scan_line_lengths_from(
bytes: &[u8],
start: usize,
max_lines: usize,
max_bytes: usize,
) -> Vec<usize> {
if max_lines == 0 || start >= bytes.len() {
return Vec::new();
}
let end = start.saturating_add(max_bytes).min(bytes.len());
let slice = &bytes[start..end];
let mut lengths = Vec::with_capacity(max_lines.min(256));
let mut line_start = 0usize;
for rel in memchr2_iter(b'\n', b'\r', slice) {
let i = start + rel;
let b = bytes[i];
if b == b'\r' && i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
continue;
}
lengths.push((rel + 1).saturating_sub(line_start));
line_start = rel + 1;
if lengths.len() >= max_lines {
return lengths;
}
}
if end == bytes.len() && lengths.len() < max_lines {
lengths.push(end.saturating_sub(start).saturating_sub(line_start));
}
lengths
}
fn count_line_breaks_in_bytes(bytes: &[u8]) -> usize {
let mut count = 0usize;
let mut i = 0usize;
while i < bytes.len() {
match bytes[i] {
b'\n' => {
count += 1;
i += 1;
}
b'\r' => {
count += 1;
if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
i += 2;
} else {
i += 1;
}
}
_ => i += 1,
}
}
count
}
#[derive(Debug)]
pub(crate) enum LineOffsets {
U32(Vec<u32>),
U64(Vec<u64>),
}
impl Default for LineOffsets {
fn default() -> Self {
Self::U32(vec![0])
}
}
impl LineOffsets {
pub(crate) fn new_for_file_len(file_len: usize) -> Self {
if file_len <= u32::MAX as usize {
let cap = Self::capacity_for::<u32>(file_len);
let mut v = Vec::with_capacity(cap);
v.push(0);
Self::U32(v)
} else {
let cap = Self::capacity_for::<u64>(file_len);
let mut v = Vec::with_capacity(cap);
v.push(0);
Self::U64(v)
}
}
pub(crate) fn len(&self) -> usize {
match self {
Self::U32(v) => v.len(),
Self::U64(v) => v.len(),
}
}
pub(crate) fn get_usize(&self, idx: usize) -> Option<usize> {
match self {
Self::U32(v) => v.get(idx).copied().map(|v| v as usize),
Self::U64(v) => v.get(idx).copied().map(|v| v as usize),
}
}
fn capacity_for<T>(file_len: usize) -> usize {
let max_offsets = (MAX_LINE_OFFSETS_BYTES / std::mem::size_of::<T>()).max(1);
let est_lines = if file_len == 0 {
1
} else {
(file_len / AVG_LINE_LEN_ESTIMATE).saturating_add(2)
};
est_lines.min(max_offsets).max(1)
}
}
#[derive(Debug)]
struct InlineOpenAnalysis {
line_offsets: LineOffsets,
line_ending: LineEnding,
avg_line_len: usize,
utf8_had_errors: bool,
}
fn analyze_inline_open(bytes: &[u8]) -> InlineOpenAnalysis {
let file_len = bytes.len();
let avg_line_len = |line_breaks: usize| {
if file_len == 0 {
AVG_LINE_LEN_ESTIMATE
} else {
file_len.div_ceil(line_breaks.saturating_add(1)).max(1)
}
};
let mut detected_line_ending = None;
if file_len <= u32::MAX as usize {
let mut offsets = Vec::with_capacity(LineOffsets::capacity_for::<u32>(file_len));
offsets.push(0);
let mut line_breaks = 0usize;
for pos in memchr2_iter(b'\n', b'\r', bytes) {
match bytes[pos] {
b'\r' if pos + 1 < bytes.len() && bytes[pos + 1] == b'\n' => {
detected_line_ending.get_or_insert(LineEnding::Crlf);
continue;
}
b'\n' if pos > 0 && bytes[pos - 1] == b'\r' => {}
b'\n' => {
detected_line_ending.get_or_insert(LineEnding::Lf);
}
b'\r' => {
detected_line_ending.get_or_insert(LineEnding::Cr);
}
_ => continue,
}
offsets.push((pos + 1) as u32);
line_breaks += 1;
}
return InlineOpenAnalysis {
line_offsets: LineOffsets::U32(offsets),
line_ending: detected_line_ending.unwrap_or(LineEnding::Lf),
avg_line_len: avg_line_len(line_breaks),
utf8_had_errors: std::str::from_utf8(bytes).is_err(),
};
}
let mut offsets = Vec::with_capacity(LineOffsets::capacity_for::<u64>(file_len));
offsets.push(0);
let mut line_breaks = 0usize;
for pos in memchr2_iter(b'\n', b'\r', bytes) {
match bytes[pos] {
b'\r' if pos + 1 < bytes.len() && bytes[pos + 1] == b'\n' => {
detected_line_ending.get_or_insert(LineEnding::Crlf);
continue;
}
b'\n' if pos > 0 && bytes[pos - 1] == b'\r' => {}
b'\n' => {
detected_line_ending.get_or_insert(LineEnding::Lf);
}
b'\r' => {
detected_line_ending.get_or_insert(LineEnding::Cr);
}
_ => continue,
}
offsets.push((pos + 1) as u64);
line_breaks += 1;
}
InlineOpenAnalysis {
line_offsets: LineOffsets::U64(offsets),
line_ending: detected_line_ending.unwrap_or(LineEnding::Lf),
avg_line_len: avg_line_len(line_breaks),
utf8_had_errors: std::str::from_utf8(bytes).is_err(),
}
}
fn estimate_avg_line_len(bytes: &[u8]) -> usize {
let len = bytes.len();
if len == 0 {
return AVG_LINE_LEN_ESTIMATE;
}
let sample = AVG_LINE_LEN_SAMPLE_BYTES.min(len);
let mut total_bytes = 0usize;
let mut total_lines = 0usize;
let mut add_sample = |start: usize| {
let end = (start + sample).min(len);
if end <= start {
return;
}
let slice = &bytes[start..end];
let mut newlines = 0usize;
for rel in memchr2_iter(b'\n', b'\r', slice) {
let i = start + rel;
let b = bytes[i];
if b == b'\r' && i + 1 < len && bytes[i + 1] == b'\n' {
continue;
}
newlines += 1;
}
total_bytes = total_bytes.saturating_add(slice.len());
total_lines = total_lines.saturating_add(newlines + 1);
};
let mut starts = vec![0];
if len > sample {
starts.push(len.saturating_sub(sample));
}
if len > sample * 2 {
starts.push(len / 4);
starts.push(len / 2 - sample / 2);
starts.push((len * 3 / 4).saturating_sub(sample / 2));
}
starts.sort_unstable();
starts.dedup();
for start in starts {
add_sample(start.min(len.saturating_sub(sample)));
}
if total_lines == 0 {
AVG_LINE_LEN_ESTIMATE
} else {
total_bytes.div_ceil(total_lines).max(1)
}
}
fn utf8_char_len(first: u8) -> usize {
if first < 0x80 {
1
} else if first < 0xE0 {
2
} else if first < 0xF0 {
3
} else if first < 0xF8 {
4
} else {
1
}
}
#[inline]
fn utf8_step(bytes: &[u8], start: usize, end: usize) -> usize {
let remaining = end.saturating_sub(start);
if remaining == 0 {
return 0;
}
let width = utf8_char_len(bytes[start]).min(remaining);
if width <= 1 {
return 1;
}
if utf8_char_is_well_formed(bytes, start, width) {
width
} else {
1
}
}
#[inline]
fn is_utf8_continuation(b: u8) -> bool {
(b & 0b1100_0000) == 0b1000_0000
}
#[inline]
fn utf8_char_is_well_formed(bytes: &[u8], start: usize, width: usize) -> bool {
if start.saturating_add(width) > bytes.len() {
return false;
}
let slice = &bytes[start..start + width];
match width {
1 => slice[0] < 0x80,
2 => is_utf8_continuation(slice[1]),
3 => match slice[0] {
0xE0 => matches!(slice[1], 0xA0..=0xBF) && is_utf8_continuation(slice[2]),
0xE1..=0xEC | 0xEE..=0xEF => {
is_utf8_continuation(slice[1]) && is_utf8_continuation(slice[2])
}
0xED => matches!(slice[1], 0x80..=0x9F) && is_utf8_continuation(slice[2]),
_ => false,
},
4 => match slice[0] {
0xF0 => {
matches!(slice[1], 0x90..=0xBF)
&& is_utf8_continuation(slice[2])
&& is_utf8_continuation(slice[3])
}
0xF1..=0xF3 => {
is_utf8_continuation(slice[1])
&& is_utf8_continuation(slice[2])
&& is_utf8_continuation(slice[3])
}
0xF4 => {
matches!(slice[1], 0x80..=0x8F)
&& is_utf8_continuation(slice[2])
&& is_utf8_continuation(slice[3])
}
_ => false,
},
_ => false,
}
}
#[inline]
fn count_text_columns(bytes: &[u8], max_cols: usize) -> usize {
let mut cols = 0usize;
let mut i = 0usize;
while i < bytes.len() && cols < max_cols {
if matches!(bytes[i], b'\n' | b'\r') {
break;
}
i += utf8_step(bytes, i, bytes.len());
cols += 1;
}
cols
}
#[inline]
fn count_text_columns_exact(bytes: &[u8]) -> usize {
let mut cols = 0usize;
let mut i = 0usize;
while i < bytes.len() {
if matches!(bytes[i], b'\n' | b'\r') {
break;
}
i += utf8_step(bytes, i, bytes.len());
cols += 1;
}
cols
}
#[derive(Debug, Clone, Copy)]
struct CursorScanState {
target: usize,
seen: usize,
line0: usize,
col0: usize,
prev_was_cr: bool,
}
impl CursorScanState {
fn new(target: usize) -> Self {
Self {
target,
seen: 0,
line0: 0,
col0: 0,
prev_was_cr: false,
}
}
fn is_done(self) -> bool {
self.seen >= self.target
}
fn position(self) -> (usize, usize) {
(self.line0, self.col0)
}
}
fn scan_cursor_position_bytes(bytes: &[u8], state: &mut CursorScanState) {
let mut i = 0usize;
while i < bytes.len() && !state.is_done() {
match bytes[i] {
b'\n' => {
state.seen = state.seen.saturating_add(1);
if !state.prev_was_cr {
state.line0 = state.line0.saturating_add(1);
}
state.col0 = 0;
state.prev_was_cr = false;
i += 1;
}
b'\r' => {
state.seen = state.seen.saturating_add(1);
state.line0 = state.line0.saturating_add(1);
state.col0 = 0;
state.prev_was_cr = true;
i += 1;
}
_ => {
state.prev_was_cr = false;
state.seen = state.seen.saturating_add(1);
state.col0 = state.col0.saturating_add(1);
i += utf8_step(bytes, i, bytes.len());
}
}
}
}
fn align_utf8_boundary_backward(bytes: &[u8], offset: usize) -> usize {
let offset = offset.min(bytes.len());
if offset == 0 || offset == bytes.len() {
return offset;
}
let Ok(text) = std::str::from_utf8(bytes) else {
return offset;
};
let mut aligned = offset;
while aligned > 0 && !text.is_char_boundary(aligned) {
aligned -= 1;
}
aligned
}
fn align_utf8_boundary_forward(bytes: &[u8], offset: usize) -> usize {
let offset = offset.min(bytes.len());
if offset == 0 || offset == bytes.len() {
return offset;
}
let Ok(text) = std::str::from_utf8(bytes) else {
return offset;
};
let mut aligned = offset;
while aligned < bytes.len() && !text.is_char_boundary(aligned) {
aligned += 1;
}
aligned
}
fn mmap_line_byte_range(
offsets: Option<&LineOffsets>,
file_len: usize,
line0: usize,
indexing_complete: bool,
) -> Option<(usize, usize)> {
let offsets = offsets?;
let start0 = offsets.get_usize(line0)?.min(file_len);
let end0 = match offsets.get_usize(line0.saturating_add(1)) {
Some(end0) => end0.min(file_len),
None if indexing_complete => file_len,
None => return None,
};
Some((start0, end0.max(start0)))
}
fn byte_offset_for_text_col_in_bytes(
bytes: &[u8],
line_range: (usize, usize),
col0: usize,
) -> usize {
let (start, end) = line_range;
if col0 == 0 || start >= end {
return start.min(end);
}
let mut col = 0usize;
let mut offset = start;
let mut i = start;
while i < end && col < col0 {
let b = bytes[i];
if b == b'\n' || b == b'\r' {
break;
}
let step = utf8_step(bytes, i, end);
col += 1;
i += step;
offset += step;
}
offset.min(end)
}
fn advance_offset_by_text_units_in_bytes(
bytes: &[u8],
file_len: usize,
start: usize,
text_units: usize,
) -> usize {
let start = start.min(file_len);
if text_units == 0 || start >= file_len {
return start;
}
let mut remaining = text_units;
let mut offset = start;
let mut pending_cr = false;
while offset < file_len && (remaining > 0 || pending_cr) {
if pending_cr {
pending_cr = false;
if bytes[offset] == b'\n' {
offset += 1;
continue;
}
}
if remaining == 0 {
break;
}
match bytes[offset] {
b'\r' => {
remaining -= 1;
offset += 1;
pending_cr = true;
}
b'\n' => {
remaining -= 1;
offset += 1;
}
_ => {
let step = utf8_step(bytes, offset, file_len);
remaining -= 1;
offset += step;
}
}
}
offset.min(file_len)
}
#[derive(Debug)]
enum OffsetsChunk {
U32(Vec<u32>),
U64(Vec<u64>),
}
#[derive(Debug)]
pub struct Document {
path: Option<PathBuf>,
storage: Option<FileStorage>,
line_offsets: Arc<RwLock<LineOffsets>>,
disk_index: Option<DiskLineIndex>,
indexing: Arc<AtomicBool>,
indexing_started: Option<Instant>,
file_len: usize,
indexed_bytes: Arc<AtomicUsize>,
avg_line_len: Arc<AtomicUsize>,
line_ending: LineEnding,
encoding: DocumentEncoding,
encoding_origin: DocumentEncodingOrigin,
decoding_had_errors: bool,
preserve_save_error_cache: Cell<Option<Option<DocumentEncodingErrorKind>>>,
rope: Option<Rope>,
piece_table: Option<PieceTable>,
dirty: bool,
}
#[derive(Debug)]
pub(crate) struct PieceTable {
original: FileStorage,
add: Vec<u8>,
pieces: PieceTree,
known_line_count: usize,
exact_base_line_breaks: Option<usize>,
exact_base_total_lines: Option<usize>,
known_byte_len: usize,
total_len: usize,
full_index: bool,
encoding_origin: DocumentEncodingOrigin,
decoding_had_errors: bool,
pending_session_flush: bool,
pending_session_edits: usize,
last_session_flush: Option<Instant>,
edit_batch_depth: usize,
edit_batch_dirty: bool,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) struct EditOutcome {
edited: bool,
cursor: (usize, usize),
}
impl EditOutcome {
const fn new(edited: bool, cursor: (usize, usize)) -> Self {
Self { edited, cursor }
}
}
impl PieceTable {
#[cfg_attr(not(test), allow(dead_code))]
fn new(original: FileStorage, line_lengths: Vec<usize>, full_index: bool) -> Self {
Self::new_with_encoding_state(
original,
line_lengths,
full_index,
None,
DocumentEncodingOrigin::Utf8FastPath,
false,
)
}
fn new_with_encoding_state(
original: FileStorage,
mut line_lengths: Vec<usize>,
full_index: bool,
exact_base_total_lines: Option<usize>,
encoding_origin: DocumentEncodingOrigin,
decoding_had_errors: bool,
) -> Self {
let total_len = original.len();
if line_lengths.is_empty() {
line_lengths.push(total_len);
}
let known_byte_len = line_lengths.iter().copied().sum::<usize>().min(total_len);
let known_line_count = line_lengths.len().max(1);
let pieces =
Self::build_initial_piece_tree(&original, total_len, &line_lengths, known_byte_len);
Self {
original,
add: Vec::new(),
pieces,
known_line_count,
exact_base_line_breaks: Some(known_line_count.saturating_sub(1)),
exact_base_total_lines: exact_base_total_lines.or(full_index.then_some(known_line_count)),
known_byte_len,
total_len,
full_index,
encoding_origin,
decoding_had_errors,
pending_session_flush: false,
pending_session_edits: 0,
last_session_flush: None,
edit_batch_depth: 0,
edit_batch_dirty: false,
}
}
fn from_recovered_session(
original: FileStorage,
add: Vec<u8>,
pieces: PieceTree,
meta: SessionMeta,
) -> Self {
let total_len = pieces.total_len();
let known_line_count = pieces.total_line_breaks().saturating_add(1).max(1);
let known_byte_len = meta.known_byte_len.min(total_len);
Self {
original,
add,
pieces,
known_line_count,
exact_base_line_breaks: meta.full_index.then_some(known_line_count.saturating_sub(1)),
exact_base_total_lines: meta.full_index.then_some(known_line_count),
known_byte_len,
total_len,
full_index: meta.full_index,
encoding_origin: meta
.encoding_origin
.unwrap_or(DocumentEncodingOrigin::Utf8FastPath),
decoding_had_errors: meta.decoding_had_errors,
pending_session_flush: false,
pending_session_edits: 0,
last_session_flush: None,
edit_batch_depth: 0,
edit_batch_dirty: false,
}
}
pub(crate) fn line_count(&self) -> usize {
self.known_line_count.max(1)
}
pub(crate) fn total_len(&self) -> usize {
self.total_len
}
pub(crate) fn full_index(&self) -> bool {
self.full_index
}
pub(crate) fn exact_line_count_with_fallback(
&self,
fallback_total_lines: Option<usize>,
) -> Option<usize> {
if self.full_index {
return Some(self.line_count().max(1));
}
let base_breaks = self.exact_base_line_breaks?;
let base_total_lines = self.exact_base_total_lines.or(fallback_total_lines)?.max(1);
let current_breaks = self.pieces.total_line_breaks();
if current_breaks >= base_breaks {
Some(base_total_lines.saturating_add(current_breaks - base_breaks).max(1))
} else {
Some(base_total_lines.saturating_sub(base_breaks - current_breaks).max(1))
}
}
pub(crate) fn fragmentation_stats(&self) -> FragmentationStats {
self.pieces.fragmentation_stats()
}
pub(crate) fn fragmentation_stats_with_threshold(
&self,
small_piece_threshold_bytes: usize,
) -> FragmentationStats {
self.pieces
.fragmentation_stats_with_threshold(small_piece_threshold_bytes)
}
fn session_meta(&self) -> SessionMeta {
SessionMeta {
known_byte_len: self.known_byte_len,
full_index: self.full_index,
encoding_origin: Some(self.encoding_origin),
decoding_had_errors: self.decoding_had_errors,
}
}
fn flush_session(&mut self) -> io::Result<()> {
self.flush_session_inner(true)
}
fn schedule_session_flush(&mut self) -> io::Result<()> {
self.pending_session_flush = true;
self.pending_session_edits = self.pending_session_edits.saturating_add(1);
if self.edit_batch_depth > 0 {
self.edit_batch_dirty = true;
return Ok(());
}
self.flush_session_inner(false)
}
fn flush_session_inner(&mut self, force: bool) -> io::Result<()> {
if !force && !self.pending_session_flush {
return Ok(());
}
if !force {
let debounce_elapsed = self
.last_session_flush
.map(|instant| instant.elapsed() >= PIECE_SESSION_FLUSH_DEBOUNCE)
.unwrap_or(true);
if !debounce_elapsed && self.pending_session_edits < PIECE_SESSION_FORCE_AFTER_EDITS {
return Ok(());
}
}
match self.pieces.flush_session(&self.add, self.session_meta()) {
Ok(()) => {
self.pending_session_flush = false;
self.pending_session_edits = 0;
self.last_session_flush = Some(Instant::now());
Ok(())
}
Err(err) => {
self.pending_session_flush = false;
self.pending_session_edits = 0;
self.last_session_flush = None;
self.pieces.detach_persistence();
if force {
Err(err)
} else {
Ok(())
}
}
}
}
fn begin_edit_batch(&mut self) {
self.edit_batch_depth = self.edit_batch_depth.saturating_add(1);
self.pieces.begin_batch_edit();
}
fn end_edit_batch(&mut self) -> io::Result<()> {
if self.edit_batch_depth == 0 {
return Ok(());
}
self.edit_batch_depth -= 1;
self.pieces.end_batch_edit();
if self.edit_batch_depth == 0 && self.edit_batch_dirty {
self.edit_batch_dirty = false;
self.flush_session_inner(false)?;
}
Ok(())
}
pub(crate) fn has_line(&self, line0: usize) -> bool {
line0 < self.line_count()
}
pub(crate) fn line_len_chars(&self, line0: usize) -> usize {
let (start, end) = self.line_range(line0);
if start >= end {
return 0;
}
let mut col = 0usize;
let mut done = false;
self.pieces
.visit_range(start, end, |piece, local_start, local_end| {
if done {
return;
}
let seg_start = piece.start + local_start;
let seg_end = piece.start + local_end;
let src = self.source_bytes(piece.src);
let mut i = seg_start;
while i < seg_end {
let b = src[i];
if b == b'\n' || b == b'\r' {
done = true;
return;
}
let step = utf8_step(src, i, seg_end);
col += 1;
i += step;
}
});
col
}
pub(crate) fn line_visible_segment(
&self,
line0: usize,
start_col: usize,
max_cols: usize,
) -> String {
if max_cols == 0 || line0 >= self.line_count() {
return String::new();
}
let Some(line_start) = self.line_start_byte(line0) else {
return String::new();
};
if line_start >= self.known_byte_len {
return String::new();
}
let start = self.byte_offset_for_col(line0, start_col);
if start >= self.known_byte_len {
return String::new();
}
let mut out = Vec::with_capacity(max_cols.min(4096).saturating_mul(4));
let mut cols = 0usize;
let _ = self
.pieces
.visit_range_while(start, self.known_byte_len, |piece, local_start, local_end| {
if cols >= max_cols {
return false;
}
let seg_start = piece.start + local_start;
let seg_end = piece.start + local_end;
let src = self.source_bytes(piece.src);
let mut i = seg_start;
while i < seg_end {
let b = src[i];
if b == b'\n' || b == b'\r' {
return false;
}
let step = utf8_step(src, i, seg_end);
out.extend_from_slice(&src[i..i + step]);
cols += 1;
if cols >= max_cols {
return false;
}
i += step;
}
true
});
String::from_utf8(out)
.unwrap_or_else(|err| String::from_utf8_lossy(&err.into_bytes()).into_owned())
}
pub(crate) fn line_slices_exact(
&self,
first_line0: usize,
line_count: usize,
start_col: usize,
max_cols: usize,
) -> Vec<LineSlice> {
if line_count == 0 {
return Vec::new();
}
if first_line0 >= self.line_count() {
return vec![LineSlice::new(String::new(), true); line_count];
}
let available = self
.line_count()
.saturating_sub(first_line0)
.min(line_count);
let mut slices = Vec::with_capacity(line_count);
for offset in 0..available {
let line0 = first_line0.saturating_add(offset);
slices.push(LineSlice::new(
self.line_visible_segment(line0, start_col, max_cols),
true,
));
}
slices.resize(line_count, LineSlice::new(String::new(), true));
slices
}
pub(crate) fn insert_text_at(
&mut self,
line_ending: LineEnding,
line0: usize,
col0: usize,
text: &str,
) -> io::Result<EditOutcome> {
let actual_col0 = self.line_len_chars(line0);
let insert_col0 = col0.min(actual_col0);
let virtual_padding_cols = col0.saturating_sub(actual_col0);
let insert_at = self.byte_offset_for_col(line0, insert_col0);
let (normalized, added_lines, last_col) =
normalize_insert_text(text, virtual_padding_cols, line_ending);
let bytes = normalized.as_bytes();
if !bytes.is_empty() {
self.insert_bytes(insert_at, bytes)?;
if insert_at <= self.known_byte_len {
self.known_byte_len = self.known_byte_len.saturating_add(bytes.len());
}
self.refresh_known_line_count();
}
let cursor = if added_lines == 0 {
(line0, col0.saturating_add(last_col))
} else {
(line0.saturating_add(added_lines), last_col)
};
Ok(EditOutcome::new(!bytes.is_empty(), cursor))
}
pub(crate) fn replace_range_at(
&mut self,
line_ending: LineEnding,
line0: usize,
col0: usize,
len_chars: usize,
text: &str,
) -> io::Result<EditOutcome> {
if len_chars == 0 {
return self.insert_text_at(line_ending, line0, col0, text);
}
let actual_col0 = self.line_len_chars(line0);
let start_col0 = col0.min(actual_col0);
let start = self.byte_offset_for_col(line0, start_col0);
let end = self.advance_offset_by_text_units(start, len_chars);
let (normalized, added_lines, last_col) = normalize_insert_text(text, 0, line_ending);
let existing = if end > start {
self.read_range(start, end)
} else {
Vec::new()
};
let cursor = if added_lines == 0 {
(line0, start_col0.saturating_add(last_col))
} else {
(line0.saturating_add(added_lines), last_col)
};
if existing == normalized.as_bytes() {
return Ok(EditOutcome::new(false, cursor));
}
self.begin_edit_batch();
let result = (|| -> io::Result<EditOutcome> {
let mut edited = false;
if end > start {
self.delete_range(start, end - start)?;
edited = true;
}
let outcome = self.insert_text_at(line_ending, line0, start_col0, text)?;
Ok(EditOutcome::new(edited || outcome.edited, outcome.cursor))
})();
let end_batch = self.end_edit_batch();
let outcome = result?;
end_batch?;
Ok(EditOutcome::new(true, outcome.cursor))
}
pub(crate) fn backspace_at(
&mut self,
line0: usize,
col0: usize,
) -> io::Result<(bool, usize, usize)> {
if self.total_len == 0 {
return Ok((false, line0, col0));
}
if col0 > 0 {
let actual_col0 = self.line_len_chars(line0);
if col0 > actual_col0 {
return Ok((false, line0, col0.saturating_sub(1)));
}
let cur_byte = self.byte_offset_for_col(line0, col0);
let prev_byte = self.byte_offset_for_col(line0, col0.saturating_sub(1));
let len = cur_byte.saturating_sub(prev_byte);
if len == 0 {
return Ok((false, line0, col0));
}
self.delete_range(prev_byte, len)?;
return Ok((true, line0, col0.saturating_sub(1)));
}
if line0 == 0 {
return Ok((false, line0, col0));
}
let line_start = self.line_range(line0).0;
let newline_len = self.newline_len_before(line_start);
if newline_len == 0 {
return Ok((false, line0, col0));
}
let del_start = line_start.saturating_sub(newline_len);
self.delete_range(del_start, newline_len)?;
let new_line0 = line0.saturating_sub(1);
let new_col0 = self.line_len_chars(new_line0);
Ok((true, new_line0, new_col0))
}
pub(crate) fn position_for_char_index(&self, char_index: usize) -> (usize, usize) {
let mut state = CursorScanState::new(char_index);
if self.total_len == 0 || state.is_done() {
return state.position();
}
self.pieces
.visit_range(0, self.total_len, |piece, local_start, local_end| {
if state.is_done() {
return;
}
let seg_start = piece.start + local_start;
let seg_end = piece.start + local_end;
let src = self.source_bytes(piece.src);
scan_cursor_position_bytes(&src[seg_start..seg_end], &mut state);
});
state.position()
}
pub(crate) fn to_string_lossy(&self) -> String {
let bytes = self.read_range(0, self.total_len);
String::from_utf8_lossy(&bytes).to_string()
}
fn source_bytes(&self, src: PieceSource) -> &[u8] {
match src {
PieceSource::Original => self.original.read_range(0, self.original.len()),
PieceSource::Add => &self.add,
}
}
fn line_range(&self, line0: usize) -> (usize, usize) {
if line0 >= self.line_count() {
return (self.total_len, self.total_len);
}
let start = self.line_start_byte(line0).unwrap_or(self.known_byte_len);
let end = if line0 + 1 < self.line_count() {
self.line_start_byte(line0 + 1)
.unwrap_or(self.known_byte_len)
} else {
self.known_byte_len
};
(
start.min(self.total_len),
end.max(start).min(self.total_len),
)
}
fn read_range(&self, start: usize, end: usize) -> Vec<u8> {
if start >= end || start >= self.total_len {
return Vec::new();
}
let end = end.min(self.total_len);
let mut out = Vec::with_capacity(end - start);
self.pieces
.visit_range(start, end, |piece, local_start, local_end| {
let seg_start = piece.start + local_start;
let seg_end = piece.start + local_end;
let src = self.source_bytes(piece.src);
out.extend_from_slice(&src[seg_start..seg_end]);
});
out
}
fn byte_at(&self, offset: usize) -> Option<u8> {
if offset >= self.total_len {
return None;
}
let mut found = None;
self.pieces
.visit_range(offset, offset.saturating_add(1), |piece, local_start, _| {
if found.is_some() {
return;
}
let src = self.source_bytes(piece.src);
found = src.get(piece.start + local_start).copied();
});
found
}
fn byte_offset_for_col(&self, line0: usize, col0: usize) -> usize {
let (start, end) = self.line_range(line0);
if col0 == 0 || start >= end {
return start;
}
let mut col = 0usize;
let mut offset = start;
self.pieces
.visit_range(start, end, |piece, local_start, local_end| {
if col >= col0 {
return;
}
let seg_start = piece.start + local_start;
let seg_end = piece.start + local_end;
let src = self.source_bytes(piece.src);
let mut i = seg_start;
while i < seg_end && col < col0 {
let b = src[i];
if b == b'\n' || b == b'\r' {
col = col0;
return;
}
let step = utf8_step(src, i, seg_end);
col += 1;
i += step;
offset += step;
}
});
offset.min(end)
}
fn advance_offset_by_text_units(&self, start: usize, text_units: usize) -> usize {
let start = start.min(self.total_len);
if text_units == 0 || start >= self.total_len {
return start;
}
let mut remaining = text_units;
let mut offset = start;
let mut pending_cr = false;
self.pieces
.visit_range(start, self.total_len, |piece, local_start, local_end| {
if remaining == 0 && !pending_cr {
return;
}
let seg_start = piece.start + local_start;
let seg_end = piece.start + local_end;
let src = self.source_bytes(piece.src);
let mut i = seg_start;
while i < seg_end && (remaining > 0 || pending_cr) {
if pending_cr {
pending_cr = false;
if src[i] == b'\n' {
i += 1;
offset = offset.saturating_add(1);
continue;
}
}
if remaining == 0 {
break;
}
match src[i] {
b'\r' => {
remaining -= 1;
i += 1;
offset = offset.saturating_add(1);
pending_cr = true;
}
b'\n' => {
remaining -= 1;
i += 1;
offset = offset.saturating_add(1);
}
_ => {
let step = utf8_step(src, i, seg_end);
remaining -= 1;
i += step;
offset = offset.saturating_add(step);
}
}
}
});
offset.min(self.total_len)
}
fn insert_bytes(&mut self, pos: usize, bytes: &[u8]) -> io::Result<()> {
if bytes.is_empty() {
return Ok(());
}
let add_start = self.add.len();
self.add.extend_from_slice(bytes);
let new_piece = Piece {
src: PieceSource::Add,
start: add_start,
len: bytes.len(),
line_breaks: count_line_breaks_in_bytes(bytes),
};
let original = &self.original;
let add = &self.add;
let mut split_piece = |piece: Piece, left_len: usize| {
split_piece_with_sources(original, add, piece, left_len)
};
self.pieces
.insert(pos.min(self.total_len), new_piece, &mut split_piece);
self.total_len = self.pieces.total_len();
if self.full_index {
self.known_byte_len = self.total_len;
}
self.schedule_session_flush()
}
fn delete_range(&mut self, start: usize, len: usize) -> io::Result<()> {
if len == 0 || start >= self.total_len {
return Ok(());
}
let end = start.saturating_add(len).min(self.total_len);
let known_overlap = end
.min(self.known_byte_len)
.saturating_sub(start.min(self.known_byte_len));
let original = &self.original;
let add = &self.add;
let mut trim_piece = |piece: Piece, local_start: usize, local_end: usize| {
trim_piece_with_sources(original, add, piece, local_start, local_end)
};
self.pieces.delete_range(start, len, &mut trim_piece);
self.total_len = self.pieces.total_len();
if self.full_index {
self.known_byte_len = self.total_len;
} else if known_overlap > 0 {
self.known_byte_len = self.known_byte_len.saturating_sub(known_overlap);
}
self.refresh_known_line_count();
self.schedule_session_flush()
}
fn newline_len_before(&self, line_start: usize) -> usize {
if line_start == 0 {
return 0;
}
let b1 = self.byte_at(line_start - 1);
if b1 == Some(b'\n') {
if line_start >= 2 && self.byte_at(line_start - 2) == Some(b'\r') {
return 2;
}
return 1;
}
if b1 == Some(b'\r') {
return 1;
}
0
}
fn build_initial_piece_tree(
original: &FileStorage,
total_len: usize,
line_lengths: &[usize],
known_byte_len: usize,
) -> PieceTree {
if total_len == 0 {
return PieceTree::new();
}
let mut pieces = Vec::new();
let mut start = 0usize;
let mut chunk_len = 0usize;
let mut chunk_breaks = 0usize;
let mut chunk_lines = 0usize;
let known_line_count = line_lengths.len().max(1);
let flush_chunk = |pieces: &mut Vec<Piece>,
start: &mut usize,
chunk_len: &mut usize,
chunk_breaks: &mut usize,
chunk_lines: &mut usize| {
if *chunk_len == 0 {
return;
}
pieces.push(Piece {
src: PieceSource::Original,
start: *start,
len: *chunk_len,
line_breaks: *chunk_breaks,
});
*start = start.saturating_add(*chunk_len);
*chunk_len = 0;
*chunk_breaks = 0;
*chunk_lines = 0;
};
for (idx, len) in line_lengths.iter().copied().enumerate() {
let line_has_break = idx + 1 < known_line_count;
if line_has_break && len > PIECE_TREE_TARGET_BYTES {
flush_chunk(
&mut pieces,
&mut start,
&mut chunk_len,
&mut chunk_breaks,
&mut chunk_lines,
);
let tail_len = PIECE_TREE_TARGET_BYTES.min(len);
let body_len = len.saturating_sub(tail_len);
if body_len > 0 {
pieces.push(Piece {
src: PieceSource::Original,
start,
len: body_len,
line_breaks: 0,
});
start = start.saturating_add(body_len);
}
pieces.push(Piece {
src: PieceSource::Original,
start,
len: tail_len,
line_breaks: 1,
});
start = start.saturating_add(tail_len);
continue;
}
chunk_len = chunk_len.saturating_add(len);
if line_has_break {
chunk_breaks = chunk_breaks.saturating_add(1);
}
chunk_lines = chunk_lines.saturating_add(1);
let should_flush =
chunk_len >= PIECE_TREE_TARGET_BYTES || chunk_lines >= PIECE_TREE_TARGET_LINES;
if should_flush {
flush_chunk(
&mut pieces,
&mut start,
&mut chunk_len,
&mut chunk_breaks,
&mut chunk_lines,
);
}
}
flush_chunk(
&mut pieces,
&mut start,
&mut chunk_len,
&mut chunk_breaks,
&mut chunk_lines,
);
if known_byte_len < total_len {
pieces.push(Piece {
src: PieceSource::Original,
start: known_byte_len,
len: total_len - known_byte_len,
line_breaks: 0,
});
}
if pieces.is_empty() {
pieces.push(Piece {
src: PieceSource::Original,
start: 0,
len: total_len,
line_breaks: 0,
});
}
if total_len >= PIECE_TREE_DISK_MIN_BYTES {
if let Ok(tree) = PieceTree::from_pieces_disk(original.path(), pieces.clone()) {
return tree;
}
}
PieceTree::from_pieces(pieces)
}
fn undo(&mut self) -> io::Result<bool> {
if !self.pieces.undo() {
return Ok(false);
}
self.total_len = self.pieces.total_len();
self.known_byte_len = self.known_byte_len.min(self.total_len);
self.refresh_known_line_count();
self.schedule_session_flush()?;
Ok(true)
}
fn redo(&mut self) -> io::Result<bool> {
if !self.pieces.redo() {
return Ok(false);
}
self.total_len = self.pieces.total_len();
self.known_byte_len = self.known_byte_len.min(self.total_len);
self.refresh_known_line_count();
self.schedule_session_flush()?;
Ok(true)
}
fn refresh_known_line_count(&mut self) {
self.known_line_count = self.pieces.total_line_breaks().saturating_add(1).max(1);
}
fn line_start_byte(&self, line0: usize) -> Option<usize> {
self.pieces
.find_line_start(line0, |piece, local_break_idx| {
self.local_offset_after_break(piece, local_break_idx)
})
.filter(|offset| *offset <= self.known_byte_len)
}
fn local_offset_after_break(&self, piece: Piece, local_break_idx: usize) -> Option<usize> {
let bytes = self.source_bytes(piece.src);
let start = piece.start.min(bytes.len());
let end = piece.start.saturating_add(piece.len).min(bytes.len());
let mut seen = 0usize;
let mut i = start;
while i < end {
match bytes[i] {
b'\n' => {
if seen == local_break_idx {
return Some(i + 1 - start);
}
seen += 1;
i += 1;
}
b'\r' => {
if i + 1 < end && bytes[i + 1] == b'\n' {
if seen == local_break_idx {
return Some(i + 2 - start);
}
seen += 1;
i += 2;
} else {
if seen == local_break_idx {
return Some(i + 1 - start);
}
seen += 1;
i += 1;
}
}
_ => i += 1,
}
}
None
}
}
fn piece_source_bytes<'a>(original: &'a FileStorage, add: &'a [u8], src: PieceSource) -> &'a [u8] {
match src {
PieceSource::Original => original.read_range(0, original.len()),
PieceSource::Add => add,
}
}
fn count_piece_line_breaks_with_sources(
original: &FileStorage,
add: &[u8],
piece: Piece,
local_start: usize,
local_end: usize,
) -> usize {
let bytes = piece_source_bytes(original, add, piece.src);
let start = piece.start.saturating_add(local_start).min(bytes.len());
let end = piece
.start
.saturating_add(local_end)
.min(bytes.len())
.max(start);
count_line_breaks_in_bytes(&bytes[start..end])
}
fn split_piece_with_sources(
original: &FileStorage,
add: &[u8],
piece: Piece,
left_len: usize,
) -> (Option<Piece>, Option<Piece>) {
let bytes = piece_source_bytes(original, add, piece.src);
let start = piece.start.min(bytes.len());
let end = piece.start.saturating_add(piece.len).min(bytes.len());
let left_len = align_utf8_boundary_backward(&bytes[start..end], left_len.min(piece.len));
let right_len = piece.len.saturating_sub(left_len);
let left = (left_len > 0).then_some(Piece {
src: piece.src,
start: piece.start,
len: left_len,
line_breaks: count_piece_line_breaks_with_sources(original, add, piece, 0, left_len),
});
let right = (right_len > 0).then_some(Piece {
src: piece.src,
start: piece.start + left_len,
len: right_len,
line_breaks: count_piece_line_breaks_with_sources(
original, add, piece, left_len, piece.len,
),
});
(left, right)
}
fn trim_piece_with_sources(
original: &FileStorage,
add: &[u8],
piece: Piece,
local_start: usize,
local_end: usize,
) -> (Option<Piece>, Option<Piece>) {
let bytes = piece_source_bytes(original, add, piece.src);
let start = piece.start.min(bytes.len());
let end = piece.start.saturating_add(piece.len).min(bytes.len());
let piece_bytes = &bytes[start..end];
let left_len = align_utf8_boundary_backward(piece_bytes, local_start.min(piece.len));
let right_start = align_utf8_boundary_forward(piece_bytes, local_end.min(piece.len));
let right_len = piece.len.saturating_sub(right_start);
let left = (left_len > 0).then_some(Piece {
src: piece.src,
start: piece.start,
len: left_len,
line_breaks: count_piece_line_breaks_with_sources(original, add, piece, 0, left_len),
});
let right = (right_len > 0).then_some(Piece {
src: piece.src,
start: piece.start + right_start,
len: right_len,
line_breaks: count_piece_line_breaks_with_sources(
original,
add,
piece,
right_start,
piece.len,
),
});
(left, right)
}
fn session_sidecar_path(path: Option<&Path>, fallback: &Path) -> PathBuf {
let source = path.unwrap_or(fallback);
editlog_path(source)
}
#[cfg(test)]
fn clear_session_sidecar(path: &Path) {
persistence::clear_session_sidecar(path);
}
#[cfg(test)]
mod tests;