use crate::lsp;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LineIndex {
line_starts: Vec<usize>,
}
impl Default for LineIndex {
fn default() -> Self {
Self {
line_starts: vec![0],
}
}
}
impl LineIndex {
#[must_use]
pub fn new(source: &str) -> Self {
let bytes = source.as_bytes();
let mut line_starts = vec![0usize];
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'\n' => {
i += 1;
line_starts.push(i);
}
b'\r' => {
i += if bytes.get(i + 1) == Some(&b'\n') {
2
} else {
1
};
line_starts.push(i);
}
_ => i += 1,
}
}
Self { line_starts }
}
#[allow(
clippy::cast_possible_truncation,
reason = "line/column values in markdown files won't exceed u32::MAX"
)]
#[must_use]
pub fn position(&self, source: &str, offset: usize) -> lsp::Position {
let mut offset = offset.min(source.len());
while offset > 0 && !source.is_char_boundary(offset) {
offset -= 1;
}
let bytes = source.as_bytes();
if offset > 0 && bytes[offset - 1] == b'\r' && bytes.get(offset) == Some(&b'\n') {
let line = self.line_starts.partition_point(|&s| s < offset) as u32;
return lsp::Position { line, character: 0 };
}
let line = self.line_starts.partition_point(|&s| s <= offset) - 1;
let line_start = self.line_starts[line];
let character = source[line_start..offset]
.chars()
.map(char::len_utf16)
.sum::<usize>() as u32;
lsp::Position {
line: line as u32,
character,
}
}
#[must_use]
pub fn offset(&self, source: &str, pos: lsp::Position) -> usize {
let Some(&start) = self.line_starts.get(pos.line as usize) else {
return source.len();
};
let mut remaining = pos.character as usize;
let mut byte = start;
for ch in source[start..].chars() {
if ch == '\n' || ch == '\r' {
break;
}
let units = ch.len_utf16();
if remaining < units {
break;
}
remaining -= units;
byte += ch.len_utf8();
}
byte
}
}
#[cfg(test)]
#[allow(
clippy::expect_used,
reason = "tests use expect for clarity per project standards"
)]
mod tests {
use super::LineIndex;
use crate::invariants::assert_line_index_agrees;
use crate::lsp;
fn check(source: &str) {
assert_line_index_agrees(source, &LineIndex::new(source));
}
#[test]
fn agrees_on_crlf_bare_cr_and_multibyte() {
check("ab\r\ncd\r\nef");
check("ab\rcd\ref");
check("aé b\nx");
check("# café 😀 header\r\nsecond λ line\n");
check("");
check("no trailing newline");
check("trailing\n");
check("\r\n\r\n");
}
#[test]
fn crlf_interior_matches_scalar_quirk() {
let src = "a\r\nb";
let index = LineIndex::new(src);
let pos = index.position(src, 2);
assert_eq!(
(pos.line, pos.character),
(1, 0),
"CRLF-interior offset is line 1 column 0, matching the scalar conversion"
);
}
#[test]
fn position_past_eof_clamps_to_end() {
let src = "abc\n";
let index = LineIndex::new(src);
let pos = index.position(src, 999);
let back = index.offset(src, pos);
assert_eq!(
back,
src.len(),
"an offset past EOF clamps to source length and round-trips"
);
}
#[test]
fn offset_past_last_line_is_source_len() {
let src = "one\ntwo";
let index = LineIndex::new(src);
let off = index.offset(
src,
lsp::Position {
line: 99,
character: 0,
},
);
assert_eq!(
off,
src.len(),
"a line past the end of input maps to source length"
);
}
}