use lsp_types::Position;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct LineCol {
pub line: usize,
pub column: usize,
}
#[derive(Debug, Clone)]
pub struct LineIndex<'a> {
text: &'a str,
line_starts: Vec<usize>,
}
impl<'a> LineIndex<'a> {
pub fn new(text: &'a str) -> Self {
let mut line_starts = Vec::with_capacity(text.len() / 40 + 1);
line_starts.push(0);
for (offset, byte) in text.bytes().enumerate() {
if byte == b'\n' {
line_starts.push(offset + 1);
}
}
Self { text, line_starts }
}
pub fn byte_to_lc(&self, offset: usize) -> LineCol {
let clamped = offset.min(self.text.len());
let line_idx = self.line_index_for(clamped);
let line_start = self.line_starts[line_idx];
let column = self.text[line_start..clamped].chars().count() + 1;
LineCol {
line: line_idx + 1,
column,
}
}
pub fn byte_to_position(&self, offset: usize) -> Position {
let clamped = offset.min(self.text.len());
let line_idx = self.line_index_for(clamped);
let line_start = self.line_starts[line_idx];
let character = self.text[line_start..clamped].encode_utf16().count() as u32;
Position::new(line_idx as u32, character)
}
pub fn position_to_byte(&self, position: Position) -> usize {
let line = position.line as usize;
let Some(&line_start) = self.line_starts.get(line) else {
return self.text.len();
};
let line_end = self
.line_starts
.get(line + 1)
.copied()
.unwrap_or(self.text.len());
let line_text = &self.text[line_start..line_end];
let mut utf16 = 0u32;
for (byte_off, ch) in line_text.char_indices() {
if utf16 >= position.character {
return line_start + byte_off;
}
utf16 += ch.len_utf16() as u32;
}
line_end
}
pub fn line_count(&self) -> usize {
self.line_starts.len()
}
fn line_index_for(&self, offset: usize) -> usize {
match self.line_starts.binary_search(&offset) {
Ok(idx) => idx,
Err(idx) => idx.saturating_sub(1),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_string() {
let idx = LineIndex::new("");
assert_eq!(idx.byte_to_lc(0), LineCol { line: 1, column: 1 });
assert_eq!(idx.byte_to_position(0), Position::new(0, 0));
}
#[test]
fn single_line() {
let idx = LineIndex::new("abc");
assert_eq!(idx.byte_to_lc(0).column, 1);
assert_eq!(idx.byte_to_lc(2).column, 3);
assert_eq!(idx.byte_to_lc(3).column, 4);
assert_eq!(idx.byte_to_position(2), Position::new(0, 2));
}
#[test]
fn multi_line() {
let idx = LineIndex::new("ab\ncd\nef");
assert_eq!(idx.byte_to_lc(0), LineCol { line: 1, column: 1 });
assert_eq!(idx.byte_to_lc(2), LineCol { line: 1, column: 3 }); assert_eq!(idx.byte_to_lc(3), LineCol { line: 2, column: 1 });
assert_eq!(idx.byte_to_lc(6), LineCol { line: 3, column: 1 });
assert_eq!(idx.byte_to_position(6), Position::new(2, 0));
}
#[test]
fn utf8_multibyte() {
let idx = LineIndex::new("\u{00e1}b\nc");
assert_eq!(idx.byte_to_lc(2), LineCol { line: 1, column: 2 });
assert_eq!(idx.byte_to_position(2), Position::new(0, 1));
assert_eq!(idx.byte_to_lc(3), LineCol { line: 1, column: 3 });
assert_eq!(idx.byte_to_position(3), Position::new(0, 2));
}
#[test]
fn utf16_surrogate_pair() {
let idx = LineIndex::new("\u{1F600}x");
assert_eq!(idx.byte_to_lc(4), LineCol { line: 1, column: 2 });
assert_eq!(idx.byte_to_position(4), Position::new(0, 2));
}
#[test]
fn offset_past_end_clamps() {
let idx = LineIndex::new("abc");
assert_eq!(idx.byte_to_lc(100), LineCol { line: 1, column: 4 });
}
#[test]
fn trailing_newline() {
let idx = LineIndex::new("ab\n");
assert_eq!(idx.byte_to_lc(3), LineCol { line: 2, column: 1 });
}
#[test]
fn position_to_byte_round_trips() {
let text = "ab\ncde\nf";
let idx = LineIndex::new(text);
for offset in 0..=text.len() {
if !text.is_char_boundary(offset) {
continue;
}
let pos = idx.byte_to_position(offset);
assert_eq!(idx.position_to_byte(pos), offset, "offset {offset}");
}
}
#[test]
fn position_to_byte_handles_utf16_and_overshoot() {
let idx = LineIndex::new("\u{1F600}x\ny");
assert_eq!(idx.position_to_byte(Position::new(0, 0)), 0);
assert_eq!(idx.position_to_byte(Position::new(0, 2)), 4); assert_eq!(idx.position_to_byte(Position::new(1, 0)), 6); assert_eq!(idx.position_to_byte(Position::new(0, 99)), 6);
assert_eq!(idx.position_to_byte(Position::new(9, 0)), 7);
}
}