use tower_lsp::lsp_types::Position;
#[derive(Debug, Clone)]
pub struct LineIndex {
line_starts: Vec<usize>,
source: String,
}
impl LineIndex {
pub fn new(source: &str) -> Self {
let mut line_starts = vec![0];
for (i, b) in source.bytes().enumerate() {
if b == b'\n' {
line_starts.push(i + 1);
}
}
LineIndex {
line_starts,
source: source.to_string(),
}
}
pub fn position(&self, byte_offset: usize) -> Position {
let byte_offset = byte_offset.min(self.source.len());
let line = self
.line_starts
.partition_point(|&start| start <= byte_offset)
.saturating_sub(1);
let line_start = self.line_starts[line];
let line_text = &self.source[line_start..byte_offset];
let character = line_text.encode_utf16().count() as u32;
Position {
line: line as u32,
character,
}
}
pub fn line_start_offset(&self, line: usize) -> usize {
self.line_starts
.get(line)
.copied()
.unwrap_or(self.source.len())
}
pub fn utf16_length(&self, byte_start: usize, byte_end: usize) -> u32 {
let start = byte_start.min(self.source.len());
let end = byte_end.min(self.source.len());
if start >= end {
return 0;
}
self.source[start..end].encode_utf16().count() as u32
}
pub fn offset(&self, position: Position) -> usize {
let line = position.line as usize;
if line >= self.line_starts.len() {
return self.source.len();
}
let line_start = self.line_starts[line];
let line_end = self
.line_starts
.get(line + 1)
.copied()
.unwrap_or(self.source.len());
let line_text = &self.source[line_start..line_end];
let mut utf16_offset = 0u32;
let target = position.character;
for (byte_idx, ch) in line_text.char_indices() {
if utf16_offset >= target {
return line_start + byte_idx;
}
utf16_offset += ch.len_utf16() as u32;
}
line_end
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn single_line() {
let idx = LineIndex::new("hello world");
assert_eq!(idx.position(0), Position { line: 0, character: 0 });
assert_eq!(idx.position(5), Position { line: 0, character: 5 });
assert_eq!(idx.position(11), Position { line: 0, character: 11 });
}
#[test]
fn multi_line() {
let idx = LineIndex::new("abc\ndef\nghi");
assert_eq!(idx.position(0), Position { line: 0, character: 0 });
assert_eq!(idx.position(3), Position { line: 0, character: 3 });
assert_eq!(idx.position(4), Position { line: 1, character: 0 });
assert_eq!(idx.position(7), Position { line: 1, character: 3 });
assert_eq!(idx.position(8), Position { line: 2, character: 0 });
}
#[test]
fn roundtrip() {
let src = "Let x be 5.\nSet x to 10.\nShow x.\n";
let idx = LineIndex::new(src);
for offset in 0..src.len() {
let pos = idx.position(offset);
let back = idx.offset(pos);
assert_eq!(back, offset, "roundtrip failed at offset {offset}");
}
}
#[test]
fn offset_from_position() {
let idx = LineIndex::new("abc\ndef\nghi");
assert_eq!(idx.offset(Position { line: 0, character: 0 }), 0);
assert_eq!(idx.offset(Position { line: 1, character: 0 }), 4);
assert_eq!(idx.offset(Position { line: 2, character: 2 }), 10);
}
#[test]
fn empty_source() {
let idx = LineIndex::new("");
assert_eq!(idx.position(0), Position { line: 0, character: 0 });
assert_eq!(idx.offset(Position { line: 0, character: 0 }), 0);
}
#[test]
fn out_of_bounds_offset() {
let idx = LineIndex::new("abc");
let pos = idx.position(100);
assert_eq!(pos, Position { line: 0, character: 3 });
}
#[test]
fn out_of_bounds_position() {
let idx = LineIndex::new("abc");
let offset = idx.offset(Position { line: 5, character: 0 });
assert_eq!(offset, 3);
}
#[test]
fn line_start_offset_returns_correct_values() {
let idx = LineIndex::new("abc\ndef\nghi");
assert_eq!(idx.line_start_offset(0), 0);
assert_eq!(idx.line_start_offset(1), 4);
assert_eq!(idx.line_start_offset(2), 8);
}
#[test]
fn line_start_offset_out_of_bounds() {
let idx = LineIndex::new("abc\ndef");
assert_eq!(idx.line_start_offset(99), 7);
}
#[test]
fn windows_line_endings() {
let src = "abc\r\ndef\r\nghi";
let idx = LineIndex::new(src);
let pos = idx.position(5);
assert_eq!(pos, Position { line: 1, character: 0 });
let back = idx.offset(pos);
assert_eq!(back, 5);
}
#[test]
fn multibyte_utf8_roundtrip() {
let src = "café\nworld";
let idx = LineIndex::new(src);
let pos_e_accent = idx.position(3);
assert_eq!(pos_e_accent.line, 0);
assert_eq!(pos_e_accent.character, 3, "UTF-16 offset of 'é' should be 3");
let pos_world = idx.position(6);
assert_eq!(pos_world, Position { line: 1, character: 0 });
let back = idx.offset(pos_world);
assert_eq!(back, 6);
}
}