use crate::lsp::Position;
pub struct LineIndex<'a> {
source: &'a str,
line_starts: Vec<usize>,
}
impl<'a> LineIndex<'a> {
pub fn new(source: &'a str) -> Self {
let mut line_starts = Vec::with_capacity(source.len() / 32 + 1);
line_starts.push(0);
for (idx, byte) in source.bytes().enumerate() {
if byte == b'\n' {
line_starts.push(idx + 1);
}
}
Self {
source,
line_starts,
}
}
pub fn line_col(&self, offset: usize) -> (u32, u32) {
let offset = offset.min(self.source.len());
let line = match self.line_starts.binary_search(&offset) {
Ok(line) => line,
Err(next) => next - 1,
};
let line_start = self.line_starts[line];
let mut col = 0u32;
for (i, ch) in self.source[line_start..].char_indices() {
if line_start + i >= offset {
break;
}
col += ch.len_utf16() as u32;
}
(line as u32, col)
}
pub fn position(&self, offset: usize) -> Position {
let (line, character) = self.line_col(offset);
Position { line, character }
}
}
pub fn offset_to_line_col(source: &str, offset: usize) -> (u32, u32) {
LineIndex::new(source).line_col(offset)
}
pub fn offset_to_position(source: &str, offset: usize) -> Position {
LineIndex::new(source).position(offset)
}
#[cfg(test)]
mod tests {
use super::{LineIndex, offset_to_line_col};
#[test]
fn zero_indexed_for_lsp() {
assert_eq!(offset_to_line_col("one\ntwo", 0), (0, 0));
assert_eq!(offset_to_line_col("one\ntwo", 3), (0, 3));
assert_eq!(offset_to_line_col("one\ntwo", 4), (1, 0));
assert_eq!(offset_to_line_col("one\ntwo", 6), (1, 2));
}
#[test]
fn clamps_offset_past_end() {
assert_eq!(offset_to_line_col("ab", 99), (0, 2));
}
#[test]
fn counts_utf16_code_units_for_non_bmp_char() {
let source = "😀x";
let emoji_bytes = '😀'.len_utf8();
assert_eq!(emoji_bytes, 4);
assert_eq!(offset_to_line_col(source, emoji_bytes), (0, 2));
assert_eq!(offset_to_line_col(source, emoji_bytes + 1), (0, 3));
}
#[test]
fn non_bmp_char_after_text() {
let source = "const icon = \"😀\"; missing";
let offset = source.find("missing").unwrap();
assert_eq!(offset_to_line_col(source, offset), (0, 19));
}
#[test]
fn crlf_line_endings() {
let source = "ab\r\ncd";
assert_eq!(offset_to_line_col(source, 2), (0, 2));
assert_eq!(offset_to_line_col(source, 3), (0, 3));
assert_eq!(offset_to_line_col(source, 4), (1, 0));
assert_eq!(offset_to_line_col(source, 5), (1, 1));
}
#[test]
fn crlf_with_non_bmp_char() {
let source = "😀\r\n😀";
let first = '😀'.len_utf8(); assert_eq!(offset_to_line_col(source, first), (0, 2));
assert_eq!(offset_to_line_col(source, first + 2), (1, 0));
assert_eq!(offset_to_line_col(source, first + 2 + first), (1, 2));
}
#[test]
fn line_index_matches_single_shot() {
let source = "alpha\nβγ😀δ\r\nlast line";
let index = LineIndex::new(source);
for offset in 0..=source.len() {
assert_eq!(
index.line_col(offset),
offset_to_line_col(source, offset),
"mismatch at offset {offset}"
);
}
}
}