use crate::types::{Pos, Span};
#[derive(Clone, Debug)]
pub struct LineIndex {
line_starts: Vec<Pos>,
}
impl LineIndex {
#[must_use]
pub fn new(source: &[u8]) -> Self {
let mut line_starts = vec![0];
let mut i = 0;
while i < source.len() {
if source[i] == b'\n' {
line_starts.push(Pos::try_from(i + 1).unwrap_or(Pos::MAX));
} else if source[i] == b'\r' && i + 1 < source.len() && source[i + 1] == b'\n' {
line_starts.push(Pos::try_from(i + 2).unwrap_or(Pos::MAX));
i += 1;
}
i += 1;
}
Self { line_starts }
}
#[must_use]
#[inline]
pub fn line_count(&self) -> u32 {
u32::try_from(self.line_starts.len()).unwrap_or(u32::MAX)
}
#[must_use]
pub fn line_col(&self, offset: Pos) -> (u32, u32) {
let offset = offset as usize;
let offset_pos = Pos::try_from(offset).unwrap_or(Pos::MAX);
let line = match self.line_starts.binary_search_by(|&s| s.cmp(&offset_pos)) {
Ok(i) => i,
Err(i) => i.saturating_sub(1),
};
let line = line.min(self.line_starts.len().saturating_sub(1));
let line_start = self.line_starts[line] as usize;
let col = offset.saturating_sub(line_start);
(
u32::try_from(line).unwrap_or(u32::MAX),
u32::try_from(col).unwrap_or(u32::MAX),
)
}
#[must_use]
#[inline]
pub fn line_col_1based(&self, offset: Pos) -> (u32, u32) {
let (line, col) = self.line_col(offset);
(line + 1, col + 1)
}
#[must_use]
#[inline]
pub fn line_start(&self, line: u32) -> Pos {
self.line_starts
.get(line as usize)
.copied()
.unwrap_or_else(|| *self.line_starts.last().unwrap_or(&0))
}
#[must_use]
pub fn line_range(&self, line: u32, source_len: usize) -> Span {
let start = self.line_start(line);
let end = self
.line_starts
.get((line + 1) as usize)
.copied()
.unwrap_or_else(|| Pos::try_from(source_len).unwrap_or(Pos::MAX));
let source_len_pos = Pos::try_from(source_len).unwrap_or(Pos::MAX);
Span::new(start, end.min(source_len_pos))
}
#[must_use]
pub fn snippet_at(&self, source: &[u8], offset: Pos) -> String {
use core::fmt::Write as _;
let (line_0, col_0) = self.line_col(offset);
let line_1 = line_0 + 1;
let span = self.line_range(line_0, source.len());
let line_bytes = span.as_slice(source);
let line_str = String::from_utf8_lossy(line_bytes);
let trimmed = line_str.trim_end_matches(['\r', '\n']);
let col_limit = u32::try_from(trimmed.len()).unwrap_or(u32::MAX);
let caret_pos = col_0.min(col_limit) as usize;
let mut out = String::with_capacity(trimmed.len() + 32);
let _ = writeln!(out, " {line_1} | {trimmed}");
out.push_str(" | ");
out.extend(core::iter::repeat_n(' ', caret_pos));
out.push('^');
out
}
#[must_use]
pub fn snippet_with_location(&self, source: &[u8], offset: Pos) -> String {
let (line_1, col_1) = self.line_col_1based(offset);
let snip = self.snippet_at(source, offset);
format!("at {line_1}:{col_1}:\n{snip}")
}
#[cfg(feature = "utf16")]
#[must_use]
pub fn line_col_utf16(&self, source: &str, offset: Pos) -> (u32, u32) {
let (line_0, _) = self.line_col(offset);
let line_start = self.line_start(line_0) as usize;
let offset_usize = offset as usize;
if line_start >= source.len() {
return (line_0, 0);
}
let rest = &source[line_start..];
let mut utf16_col = 0u32;
for (i, c) in rest.char_indices() {
if line_start + i >= offset_usize {
break;
}
utf16_col += u32::try_from(c.len_utf16()).unwrap_or(0);
}
(line_0, utf16_col)
}
#[cfg(feature = "utf16")]
#[inline]
#[must_use]
pub fn line_col_utf16_1based(&self, source: &str, offset: Pos) -> (u32, u32) {
let (line_0, utf16_col) = self.line_col_utf16(source, offset);
(line_0 + 1, utf16_col + 1)
}
#[cfg(feature = "utf16")]
#[must_use]
pub fn line_col_utf16_to_byte(&self, source: &str, line: u32, character: u32) -> Option<Pos> {
let line_start = self.line_start(line) as usize;
let source_len = source.len();
let line_span = self.line_range(line, source_len);
let line_end = line_span.end as usize;
let line_src = source.get(line_start..line_end)?;
let mut utf16_col = 0u32;
for (i, c) in line_src.char_indices() {
if utf16_col == character {
return Pos::try_from(line_start + i).ok();
}
if utf16_col > character {
return None;
}
utf16_col += u32::try_from(c.len_utf16()).ok()?;
}
if utf16_col == character {
Pos::try_from(line_start + line_src.len()).ok()
} else {
None
}
}
#[cfg(feature = "utf16")]
#[must_use]
pub fn line_col_utf16_to_byte_clamped(
&self,
source: &str,
line: u32,
character: u32,
) -> Option<Pos> {
if let Some(p) = self.line_col_utf16_to_byte(source, line, character) {
return Some(p);
}
let line_start = self.line_start(line) as usize;
let source_len = source.len();
let line_span = self.line_range(line, source_len);
let line_end = line_span.end as usize;
let line_src = source.get(line_start..line_end)?;
Pos::try_from(line_start + line_src.len()).ok()
}
#[cfg(feature = "utf16")]
#[must_use]
pub fn line_prefix_utf16(&self, source: &str, line: u32, character: u32) -> Option<String> {
let line_start = self.line_start(line) as usize;
let line_span = self.line_range(line, source.len());
let line_end = line_span.end as usize;
let line_src = source.get(line_start..line_end)?;
let mut utf16_count = 0u32;
let mut end = line_src.len();
for (i, c) in line_src.char_indices() {
if utf16_count >= character {
end = i;
break;
}
utf16_count += u32::try_from(c.len_utf16()).unwrap_or(0);
}
Some(line_src[..end].to_string())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn line_index_single_line() {
let src = b"hello";
let idx = LineIndex::new(src);
assert_eq!(idx.line_count(), 1);
assert_eq!(idx.line_col(0), (0, 0));
assert_eq!(idx.line_col(3), (0, 3));
assert_eq!(idx.line_col_1based(3), (1, 4));
}
#[test]
fn line_index_multiline() {
let src = b"a\nbb\nccc";
let idx = LineIndex::new(src);
assert_eq!(idx.line_count(), 3);
assert_eq!(idx.line_col(0), (0, 0));
assert_eq!(idx.line_col(1), (0, 1));
assert_eq!(idx.line_col(2), (1, 0));
assert_eq!(
idx.line_col(5).0,
2,
"offset 5 should be on line 2 (third line)"
);
assert_eq!(idx.line_col(6), (2, 1));
}
#[test]
fn snippet_at() {
let src = b"let x = 1\n foo";
let idx = LineIndex::new(src);
let s = idx.snippet_at(src, 7);
assert!(s.contains("let x = 1"));
assert!(s.contains("^"));
}
#[cfg(feature = "utf16")]
#[test]
fn utf16_to_byte_out_of_range_returns_none() {
let src = "abc";
let idx = LineIndex::new(src.as_bytes());
assert_eq!(idx.line_col_utf16_to_byte(src, 0, 4), None);
assert!(idx.line_col_utf16_to_byte_clamped(src, 0, 4).is_some());
}
#[cfg(feature = "utf16")]
#[test]
fn utf16_to_byte_emoji_boundaries() {
let src = "x\u{1F600}y";
let idx = LineIndex::new(src.as_bytes());
let b0 = idx.line_col_utf16_to_byte(src, 0, 0).unwrap() as usize;
let b1 = idx.line_col_utf16_to_byte(src, 0, 1).unwrap() as usize;
let b3 = idx.line_col_utf16_to_byte(src, 0, 3).unwrap() as usize;
let b4 = idx.line_col_utf16_to_byte(src, 0, 4).unwrap() as usize;
assert_eq!(&src[b0..], src);
assert_eq!(&src[b1..], "\u{1F600}y");
assert_eq!(&src[b3..], "y");
assert_eq!(b4, src.len());
}
}