use text_size::{TextRange, TextSize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct LineCol {
pub line: u32,
pub col: u32,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct WideLineCol {
pub line: u32,
pub col: u32,
}
#[derive(Debug, Clone)]
pub struct LineIndex {
newlines: Vec<TextSize>,
wide_chars: Vec<Vec<WideChar>>,
}
#[derive(Debug, Clone, Copy)]
struct WideChar {
start: TextSize,
len: TextSize,
}
impl LineIndex {
#[must_use]
pub fn new(text: &str) -> Self {
let mut newlines = vec![TextSize::from(0)];
let mut wide_chars: Vec<Vec<WideChar>> = vec![Vec::new()];
let mut line_start = TextSize::from(0);
for (offset, ch) in text.char_indices() {
let offset = TextSize::try_from(offset).expect("source fits in u32 bytes");
if ch == '\n' {
let next_line_start = offset + TextSize::of('\n');
newlines.push(next_line_start);
wide_chars.push(Vec::new());
line_start = next_line_start;
continue;
}
if !ch.is_ascii() {
let start_in_line = offset - line_start;
wide_chars
.last_mut()
.expect("at least one line always exists")
.push(WideChar {
start: start_in_line,
len: TextSize::of(ch),
});
}
}
Self {
newlines,
wide_chars,
}
}
#[must_use]
pub fn line_col(&self, offset: TextSize) -> LineCol {
let line = self
.newlines
.partition_point(|&start| start <= offset)
.saturating_sub(1);
let line_start = self.newlines[line];
let col = offset - line_start;
LineCol {
line: u32::try_from(line).expect("line count fits in u32"),
col: u32::from(col),
}
}
#[must_use]
pub fn to_utf16(&self, pos: LineCol) -> WideLineCol {
let line = pos.line as usize;
if line >= self.wide_chars.len() || self.wide_chars[line].is_empty() {
return WideLineCol {
line: pos.line,
col: pos.col,
};
}
let mut col = pos.col;
for wc in &self.wide_chars[line] {
if u32::from(wc.start) >= pos.col {
break;
}
let utf8_len = u32::from(wc.len);
let utf16_len: u32 = if utf8_len == 4 { 2 } else { 1 };
col = col - utf8_len + utf16_len;
}
WideLineCol {
line: pos.line,
col,
}
}
#[must_use]
pub fn from_utf16(&self, pos: WideLineCol) -> LineCol {
let line = pos.line as usize;
if line >= self.wide_chars.len() || self.wide_chars[line].is_empty() {
return LineCol {
line: pos.line,
col: pos.col,
};
}
let mut utf16_seen: u32 = 0;
let mut col = pos.col;
for wc in &self.wide_chars[line] {
let wc_col_utf8 = u32::from(wc.start);
if wc_col_utf8 + utf16_seen >= col {
break;
}
let utf8_len = u32::from(wc.len);
let utf16_len: u32 = if utf8_len == 4 { 2 } else { 1 };
col = col + utf8_len - utf16_len;
utf16_seen += utf16_len;
}
LineCol {
line: pos.line,
col,
}
}
#[must_use]
pub fn line_range(&self, line: u32) -> Option<TextRange> {
let idx = line as usize;
let start = *self.newlines.get(idx)?;
let end = self
.newlines
.get(idx + 1)
.copied()
.unwrap_or(TextSize::from(u32::MAX));
Some(TextRange::new(start, end))
}
#[must_use]
pub fn line_count(&self) -> u32 {
u32::try_from(self.newlines.len()).expect("line count fits in u32")
}
}
#[cfg(test)]
mod tests {
use super::{LineCol, LineIndex, WideLineCol};
use pretty_assertions::assert_eq;
use text_size::{TextRange, TextSize};
#[test]
fn empty_input_is_one_line() {
let idx = LineIndex::new("");
assert_eq!(idx.line_count(), 1);
assert_eq!(idx.line_col(TextSize::from(0)), LineCol { line: 0, col: 0 });
}
#[test]
fn ascii_single_line_line_col() {
let idx = LineIndex::new("abc");
assert_eq!(idx.line_count(), 1);
assert_eq!(idx.line_col(TextSize::from(2)), LineCol { line: 0, col: 2 });
}
#[test]
fn ascii_multi_line_line_col() {
let idx = LineIndex::new("ab\ncde\nf");
assert_eq!(idx.line_count(), 3);
assert_eq!(idx.line_col(TextSize::from(0)), LineCol { line: 0, col: 0 });
assert_eq!(idx.line_col(TextSize::from(2)), LineCol { line: 0, col: 2 });
assert_eq!(idx.line_col(TextSize::from(3)), LineCol { line: 1, col: 0 });
assert_eq!(idx.line_col(TextSize::from(6)), LineCol { line: 1, col: 3 });
assert_eq!(idx.line_col(TextSize::from(7)), LineCol { line: 2, col: 0 });
}
#[test]
fn utf8_offset_is_bytes_not_chars() {
let idx = LineIndex::new("éllo");
assert_eq!(idx.line_col(TextSize::from(2)), LineCol { line: 0, col: 2 });
assert_eq!(idx.line_col(TextSize::from(1)), LineCol { line: 0, col: 1 });
}
#[test]
fn utf16_round_trip_bmp() {
let idx = LineIndex::new("abé");
let utf8 = idx.line_col(TextSize::from(4));
assert_eq!(utf8, LineCol { line: 0, col: 4 });
let utf16 = idx.to_utf16(utf8);
assert_eq!(utf16, WideLineCol { line: 0, col: 3 });
let back = idx.from_utf16(utf16);
assert_eq!(back, utf8);
}
#[test]
fn utf16_round_trip_astral() {
let idx = LineIndex::new("a\u{1F600}b");
let utf8 = idx.line_col(TextSize::from(5));
assert_eq!(utf8, LineCol { line: 0, col: 5 });
let utf16 = idx.to_utf16(utf8);
assert_eq!(utf16, WideLineCol { line: 0, col: 3 });
let back = idx.from_utf16(utf16);
assert_eq!(back, utf8);
}
#[test]
fn line_range_last_line_open_ended() {
let idx = LineIndex::new("ab\ncd");
assert_eq!(
idx.line_range(0),
Some(TextRange::new(TextSize::from(0), TextSize::from(3)))
);
assert_eq!(
idx.line_range(1),
Some(TextRange::new(TextSize::from(3), TextSize::from(u32::MAX)))
);
assert_eq!(idx.line_range(2), None);
}
}