use ropey::Rope;
#[inline]
fn is_utf8_continuation(b: u8) -> bool {
(b & 0b1100_0000) == 0b1000_0000
}
#[derive(Debug, Clone)]
pub struct LineStartsCache {
line_starts: Vec<usize>,
}
impl LineStartsCache {
fn normalize_text_offset(text: &str, offset: usize) -> usize {
let mut normalized = offset.min(text.len());
while normalized > 0 && !text.is_char_boundary(normalized) {
normalized -= 1;
}
normalized
}
pub fn new(text: &str) -> Self {
let mut ls = vec![0];
let mut i = 0;
let b = text.as_bytes();
while i < b.len() {
if b[i] == b'\n' {
ls.push(i + 1);
} else if b[i] == b'\r' {
if i + 1 < b.len() && b[i + 1] == b'\n' {
ls.push(i + 2);
i += 1;
} else {
ls.push(i + 1);
}
}
i += 1;
}
Self { line_starts: ls }
}
pub fn new_rope(rope: &Rope) -> Self {
let mut ls = vec![0];
for li in 0..rope.len_lines() {
if li > 0 {
ls.push(rope.line_to_byte(li));
}
}
Self { line_starts: ls }
}
pub fn offset_to_position(&self, text: &str, offset: usize) -> (u32, u32) {
let offset = Self::normalize_text_offset(text, offset);
let line = self.line_starts.binary_search(&offset).unwrap_or_else(|i| i.saturating_sub(1));
let ls = self.line_starts[line];
(line as u32, text[ls..offset].chars().map(|c| c.len_utf16()).sum::<usize>() as u32)
}
pub fn position_to_offset(&self, text: &str, line: u32, character: u32) -> usize {
let line = line as usize;
if line >= self.line_starts.len() {
return text.len();
}
let ls = self.line_starts[line];
let le = if line + 1 < self.line_starts.len() {
let ns = self.line_starts[line + 1];
let mut end = ns.saturating_sub(1);
let b = text.as_bytes();
while end > ls && (b[end] == b'\n' || b[end] == b'\r') {
end = end.saturating_sub(1);
}
end + 1
} else {
text.len()
};
let lt = &text[ls..le];
let mut uc = 0;
let mut bo = 0;
for ch in lt.chars() {
if uc >= character as usize {
break;
}
uc += ch.len_utf16();
bo += ch.len_utf8();
}
ls + bo.min(lt.len())
}
pub fn offset_to_position_rope(&self, rope: &Rope, offset: usize) -> (u32, u32) {
let offset = Self::normalize_rope_offset(rope, offset);
let line = self.line_starts.binary_search(&offset).unwrap_or_else(|i| i.saturating_sub(1));
let ls = self.line_starts[line];
(
line as u32,
rope.byte_slice(ls..offset).chars().map(|c| c.len_utf16()).sum::<usize>() as u32,
)
}
fn normalize_rope_offset(rope: &Rope, offset: usize) -> usize {
let len = rope.len_bytes();
let mut normalized = offset.min(len);
while normalized > 0 && normalized < len && is_utf8_continuation(rope.byte(normalized)) {
normalized -= 1;
}
normalized
}
pub fn position_to_offset_rope(&self, rope: &Rope, line: u32, character: u32) -> usize {
let line = line as usize;
if line >= self.line_starts.len() {
return rope.len_bytes();
}
let ls = self.line_starts[line];
let le = if line + 1 < self.line_starts.len() {
self.line_starts[line + 1]
} else {
rope.len_bytes()
};
let sl = rope.byte_slice(ls..le);
let mut uc = 0;
let mut bo = 0;
for ch in sl.chars() {
if uc >= character as usize {
break;
}
uc += ch.len_utf16();
bo += ch.len_utf8();
}
ls + bo
}
}
#[derive(Debug, Clone)]
pub struct LineIndex {
line_starts: Vec<usize>,
text: String,
}
impl LineIndex {
pub fn new(text: String) -> Self {
let mut line_starts = vec![0];
let bytes = text.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\n' {
line_starts.push(i + 1);
} else if bytes[i] == b'\r' {
if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
line_starts.push(i + 2);
i += 1;
} else {
line_starts.push(i + 1);
}
}
i += 1;
}
Self { line_starts, text }
}
pub fn offset_to_position(&self, offset: usize) -> (u32, u32) {
let offset = self.normalize_offset(offset);
let line = self.line_starts.binary_search(&offset).unwrap_or_else(|i| i.saturating_sub(1));
let line_start = self.line_starts[line];
let column = self.utf16_column(line, offset - line_start);
(line as u32, column as u32)
}
pub fn position_to_offset(&self, line: u32, character: u32) -> Option<usize> {
let line = line as usize;
if line >= self.line_starts.len() {
return None;
}
let line_start = self.line_starts[line];
let line_end = if line + 1 < self.line_starts.len() {
self.line_starts[line + 1]
} else {
self.text.len()
};
let line_text = &self.text[line_start..line_end];
let byte_offset = self.utf16_to_byte_offset(line_text, character as usize)?;
Some(line_start + byte_offset)
}
fn utf16_column(&self, line: usize, byte_offset: usize) -> usize {
let line_start = self.line_starts[line];
let target_byte = line_start + byte_offset;
if target_byte > self.text.len() {
return 0;
}
let line_text = &self.text[line_start..target_byte];
line_text.chars().map(|ch| ch.len_utf16()).sum()
}
fn utf16_to_byte_offset(&self, line_text: &str, utf16_offset: usize) -> Option<usize> {
let mut current_utf16 = 0;
for (byte_offset, ch) in line_text.char_indices() {
if current_utf16 == utf16_offset {
return Some(byte_offset);
}
current_utf16 += ch.len_utf16();
if current_utf16 > utf16_offset {
return None;
}
}
if current_utf16 == utf16_offset { Some(line_text.len()) } else { None }
}
fn normalize_offset(&self, offset: usize) -> usize {
let mut normalized = offset.min(self.text.len());
while normalized > 0 && !self.text.is_char_boundary(normalized) {
normalized -= 1;
}
normalized
}
pub fn range(&self, start: usize, end: usize) -> ((u32, u32), (u32, u32)) {
let start_pos = self.offset_to_position(start);
let end_pos = self.offset_to_position(end);
(start_pos, end_pos)
}
}