use text_size::TextSize;
#[derive(Debug, Clone)]
pub struct LineIndex {
line_starts: Vec<u32>,
text: String,
}
impl LineIndex {
pub fn new(text: &str) -> Self {
let mut line_starts = vec![0];
for (idx, byte) in text.bytes().enumerate() {
if byte == b'\n' {
line_starts.push((idx + 1) as u32);
}
}
Self {
line_starts,
text: text.to_string(),
}
}
pub fn line_col(&self, byte_offset: u32) -> (u32, u32) {
let byte_offset = byte_offset as usize;
let line = self
.line_starts
.partition_point(|&start| (start as usize) <= byte_offset)
.saturating_sub(1);
let line_start = self.line_starts[line] as usize;
let col = (byte_offset - line_start) as u32;
(line as u32, col)
}
pub fn utf8_to_utf16_col(&self, line: u32, utf8_col: u32) -> u32 {
let line_start = self.line_starts.get(line as usize).copied().unwrap_or(0) as usize;
let col_end = line_start + utf8_col as usize;
let line_text = &self.text[line_start..col_end.min(self.text.len())];
line_text.encode_utf16().count() as u32
}
pub fn utf16_to_utf8_col(&self, line: u32, utf16_col: u32) -> Option<u32> {
let line_start = self.line_starts.get(line as usize).copied()? as usize;
let line_end = self
.line_starts
.get(line as usize + 1)
.map(|&end| (end as usize).saturating_sub(1))
.unwrap_or(self.text.len());
let line_text = &self.text[line_start..line_end];
let mut utf16_count = 0u32;
let mut byte_offset = 0usize;
for ch in line_text.chars() {
if utf16_count >= utf16_col {
break;
}
utf16_count += ch.len_utf16() as u32;
byte_offset += ch.len_utf8();
}
Some(byte_offset as u32)
}
pub fn line_start(&self, line: u32) -> Option<TextSize> {
self.line_starts
.get(line as usize)
.copied()
.map(TextSize::from)
}
pub fn offset(&self, line: u32, utf16_col: u32) -> u32 {
let line_start = self
.line_starts
.get(line as usize)
.copied()
.unwrap_or(self.text.len() as u32) as usize;
let utf8_col = self.utf16_to_utf8_col(line, utf16_col).unwrap_or(0);
(line_start as u32) + utf8_col
}
pub fn utf16_len(&self, byte_start: usize, byte_end: usize) -> u32 {
let text = &self.text[byte_start.min(self.text.len())..byte_end.min(self.text.len())];
text.encode_utf16().count() as u32
}
pub fn line_range(&self, line: u32) -> std::ops::Range<u32> {
let start = self
.line_starts
.get(line as usize)
.copied()
.unwrap_or(self.text.len() as u32);
let end = self
.line_starts
.get(line as usize + 1)
.copied()
.unwrap_or(self.text.len() as u32);
start..end
}
pub fn line_count(&self) -> u32 {
self.line_starts.len() as u32
}
pub fn len(&self) -> usize {
self.line_starts.len()
}
pub fn is_empty(&self) -> bool {
self.text.is_empty()
}
pub fn text_len(&self) -> TextSize {
TextSize::from(self.text.len() as u32)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_line_index_ascii() {
let text = "line1\nline2\nline3";
let index = LineIndex::new(text);
assert_eq!(index.line_col(0), (0, 0));
assert_eq!(index.line_col(5), (0, 5));
assert_eq!(index.line_col(6), (1, 0));
assert_eq!(index.line_col(11), (1, 5));
assert_eq!(index.line_col(12), (2, 0));
}
#[test]
fn test_offset_conversion_ascii() {
let text = "line1\nline2\nline3";
let index = LineIndex::new(text);
assert_eq!(index.offset(0, 0), 0);
assert_eq!(index.offset(1, 0), 6);
assert_eq!(index.offset(2, 0), 12);
assert_eq!(index.offset(2, 5), 17);
}
#[test]
fn test_utf8_multibyte() {
let text = "Café";
let index = LineIndex::new(text);
assert_eq!(index.line_col(0), (0, 0)); assert_eq!(index.line_col(1), (0, 1)); assert_eq!(index.line_col(2), (0, 2)); assert_eq!(index.line_col(3), (0, 3)); assert_eq!(index.line_col(5), (0, 5));
assert_eq!(index.utf8_to_utf16_col(0, 0), 0); assert_eq!(index.utf8_to_utf16_col(0, 3), 3); assert_eq!(index.utf8_to_utf16_col(0, 5), 4); }
#[test]
fn test_utf8_emoji() {
let text = "A🍳B";
let index = LineIndex::new(text);
assert_eq!(index.line_col(0), (0, 0)); assert_eq!(index.line_col(1), (0, 1)); assert_eq!(index.line_col(5), (0, 5));
assert_eq!(index.utf8_to_utf16_col(0, 0), 0); assert_eq!(index.utf8_to_utf16_col(0, 1), 1); assert_eq!(index.utf8_to_utf16_col(0, 5), 3); }
#[test]
fn test_utf16_len() {
let text = "Café🍳";
let index = LineIndex::new(text);
assert_eq!(index.utf16_len(0, 3), 3);
assert_eq!(index.utf16_len(3, 5), 1);
assert_eq!(index.utf16_len(5, 9), 2);
}
#[test]
fn test_offset_from_utf16() {
let text = "Café";
let index = LineIndex::new(text);
assert_eq!(index.offset(0, 0), 0); assert_eq!(index.offset(0, 1), 1); assert_eq!(index.offset(0, 2), 2); assert_eq!(index.offset(0, 3), 3); assert_eq!(index.offset(0, 4), 5); }
#[test]
fn test_utf16_to_utf8_col() {
let text = "Café";
let index = LineIndex::new(text);
assert_eq!(index.utf16_to_utf8_col(0, 0), Some(0)); assert_eq!(index.utf16_to_utf8_col(0, 3), Some(3)); assert_eq!(index.utf16_to_utf8_col(0, 4), Some(5)); }
#[test]
fn test_empty_text() {
let text = "";
let index = LineIndex::new(text);
assert_eq!(index.line_col(0), (0, 0));
assert_eq!(index.line_count(), 1);
}
#[test]
fn test_chinese_characters() {
let text = "中文";
let index = LineIndex::new(text);
assert_eq!(index.line_col(0), (0, 0)); assert_eq!(index.line_col(3), (0, 3)); assert_eq!(index.line_col(6), (0, 6));
assert_eq!(index.utf8_to_utf16_col(0, 0), 0);
assert_eq!(index.utf8_to_utf16_col(0, 3), 1);
assert_eq!(index.utf8_to_utf16_col(0, 6), 2);
}
#[test]
fn test_line_start() {
let text = "line1\nline2\nline3";
let index = LineIndex::new(text);
assert_eq!(index.line_start(0), Some(TextSize::from(0)));
assert_eq!(index.line_start(1), Some(TextSize::from(6)));
assert_eq!(index.line_start(2), Some(TextSize::from(12)));
assert_eq!(index.line_start(3), None);
}
}