#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct LineCol {
pub line: usize,
pub column: usize,
}
#[derive(Debug, Clone)]
pub struct LineIndex {
line_starts: Vec<usize>,
len: usize,
}
impl LineIndex {
pub fn new(text: &str) -> Self {
let mut line_starts = vec![0];
let bytes = text.as_bytes();
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'\n' => {
i += 1;
line_starts.push(i);
}
b'\r' => {
i += if bytes.get(i + 1) == Some(&b'\n') {
2
} else {
1
};
line_starts.push(i);
}
_ => i += 1,
}
}
Self {
line_starts,
len: text.len(),
}
}
fn line_of(&self, offset: usize) -> usize {
match self.line_starts.binary_search(&offset) {
Ok(line) => line,
Err(next) => next - 1,
}
}
pub fn line_col(&self, text: &str, offset: usize) -> LineCol {
let offset = offset.min(self.len);
let line = self.line_of(offset);
let start = self.line_starts[line];
let column = text[start..offset].chars().count() + 1;
LineCol {
line: line + 1,
column,
}
}
pub fn utf16_position(&self, text: &str, offset: usize) -> (u32, u32) {
let offset = offset.min(self.len);
let line = self.line_of(offset);
let start = self.line_starts[line];
let character: usize = text[start..offset].chars().map(char::len_utf16).sum();
(line as u32, character as u32)
}
pub fn offset_at(&self, text: &str, line: u32, character: u32) -> usize {
let line = line as usize;
let Some(&start) = self.line_starts.get(line) else {
return self.len;
};
let line_end = self
.line_starts
.get(line + 1)
.map(|&next| line_end_excluding_newline(text, start, next))
.unwrap_or(self.len);
let mut units = 0u32;
for (i, ch) in text[start..line_end].char_indices() {
if units >= character {
return start + i;
}
units += ch.len_utf16() as u32;
}
line_end
}
}
fn line_end_excluding_newline(text: &str, start: usize, next: usize) -> usize {
let bytes = text.as_bytes();
let mut end = next;
if end > start && bytes[end - 1] == b'\n' {
end -= 1;
if end > start && bytes[end - 1] == b'\r' {
end -= 1;
}
}
end
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn line_col_basic() {
let text = "ab\ncde\n";
let idx = LineIndex::new(text);
assert_eq!(idx.line_col(text, 0), LineCol { line: 1, column: 1 });
assert_eq!(idx.line_col(text, 1), LineCol { line: 1, column: 2 });
assert_eq!(idx.line_col(text, 3), LineCol { line: 2, column: 1 });
assert_eq!(idx.line_col(text, 5), LineCol { line: 2, column: 3 });
}
#[test]
fn utf16_counts_surrogates() {
let text = "a𝕏b";
let idx = LineIndex::new(text);
let off = "a𝕏".len(); assert_eq!(idx.utf16_position(text, off), (0, 3));
}
#[test]
fn crlf_line_starts() {
let text = "a\r\nb";
let idx = LineIndex::new(text);
assert_eq!(idx.line_col(text, 3), LineCol { line: 2, column: 1 });
}
#[test]
fn offset_at_round_trips_utf16_positions() {
let text = "a𝕏b\ncd";
let idx = LineIndex::new(text);
for offset in (0..=text.len()).filter(|&o| text.is_char_boundary(o)) {
let (line, character) = idx.utf16_position(text, offset);
assert_eq!(
idx.offset_at(text, line, character),
offset,
"offset {offset}"
);
}
}
#[test]
fn offset_at_crlf_terminator() {
let text = "ab\r\ncd";
let idx = LineIndex::new(text);
assert_eq!(idx.offset_at(text, 0, 2), 2); assert_eq!(idx.offset_at(text, 1, 0), 4); }
#[test]
fn offset_at_clamps_out_of_range() {
let text = "ab\ncde\n";
let idx = LineIndex::new(text);
assert_eq!(idx.offset_at(text, 0, 99), 2);
assert_eq!(idx.offset_at(text, 2, 0), 7);
assert_eq!(idx.offset_at(text, 99, 0), text.len());
}
#[test]
fn offset_at_inside_surrogate_pair_snaps_to_code_point_end() {
let text = "𝕏";
let idx = LineIndex::new(text);
assert_eq!(idx.offset_at(text, 0, 1), text.len());
}
}